From 886b39de557b4d5f54f5ca11559fca9799534280 Mon Sep 17 00:00:00 2001
From: Josh Rosen <rosenville@gmail.com>
Date: Fri, 10 Aug 2012 01:10:02 -0700
Subject: [PATCH 001/291] Add Python API.

---
 .../scala/spark/api/python/PythonRDD.scala    | 147 +++++
 pyspark/pyspark/__init__.py                   |   0
 pyspark/pyspark/context.py                    |  69 +++
 pyspark/pyspark/examples/__init__.py          |   0
 pyspark/pyspark/examples/kmeans.py            |  56 ++
 pyspark/pyspark/examples/pi.py                |  20 +
 pyspark/pyspark/examples/tc.py                |  49 ++
 pyspark/pyspark/java_gateway.py               |  20 +
 pyspark/pyspark/join.py                       | 104 ++++
 pyspark/pyspark/rdd.py                        | 517 ++++++++++++++++++
 pyspark/pyspark/serializers.py                | 229 ++++++++
 pyspark/pyspark/worker.py                     |  97 ++++
 pyspark/requirements.txt                      |   9 +
 python/tc.py                                  |  22 +
 14 files changed, 1339 insertions(+)
 create mode 100644 core/src/main/scala/spark/api/python/PythonRDD.scala
 create mode 100644 pyspark/pyspark/__init__.py
 create mode 100644 pyspark/pyspark/context.py
 create mode 100644 pyspark/pyspark/examples/__init__.py
 create mode 100644 pyspark/pyspark/examples/kmeans.py
 create mode 100644 pyspark/pyspark/examples/pi.py
 create mode 100644 pyspark/pyspark/examples/tc.py
 create mode 100644 pyspark/pyspark/java_gateway.py
 create mode 100644 pyspark/pyspark/join.py
 create mode 100644 pyspark/pyspark/rdd.py
 create mode 100644 pyspark/pyspark/serializers.py
 create mode 100644 pyspark/pyspark/worker.py
 create mode 100644 pyspark/requirements.txt
 create mode 100644 python/tc.py

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
new file mode 100644
index 0000000000..660ad48afe
--- /dev/null
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -0,0 +1,147 @@
+package spark.api.python
+
+import java.io.PrintWriter
+
+import scala.collection.Map
+import scala.collection.JavaConversions._
+import scala.io.Source
+import spark._
+import api.java.{JavaPairRDD, JavaRDD}
+import scala.Some
+
+trait PythonRDDBase {
+  def compute[T](split: Split, envVars: Map[String, String],
+    command: Seq[String], parent: RDD[T], pythonExec: String): Iterator[String]= {
+    val currentEnvVars = new ProcessBuilder().environment()
+    val SPARK_HOME = currentEnvVars.get("SPARK_HOME")
+
+    val pb = new ProcessBuilder(Seq(pythonExec, SPARK_HOME + "/pyspark/pyspark/worker.py"))
+    // Add the environmental variables to the process.
+    envVars.foreach {
+      case (variable, value) => currentEnvVars.put(variable, value)
+    }
+
+    val proc = pb.start()
+    val env = SparkEnv.get
+
+    // Start a thread to print the process's stderr to ours
+    new Thread("stderr reader for " + command) {
+      override def run() {
+        for (line <- Source.fromInputStream(proc.getErrorStream).getLines) {
+          System.err.println(line)
+        }
+      }
+    }.start()
+
+    // Start a thread to feed the process input from our parent's iterator
+    new Thread("stdin writer for " + command) {
+      override def run() {
+        SparkEnv.set(env)
+        val out = new PrintWriter(proc.getOutputStream)
+        for (elem <- command) {
+          out.println(elem)
+        }
+        for (elem <- parent.iterator(split)) {
+          out.println(PythonRDD.pythonDump(elem))
+        }
+        out.close()
+      }
+    }.start()
+
+    // Return an iterator that read lines from the process's stdout
+    val lines: Iterator[String] = Source.fromInputStream(proc.getInputStream).getLines
+    wrapIterator(lines, proc)
+  }
+
+  def wrapIterator[T](iter: Iterator[T], proc: Process): Iterator[T] = {
+    return new Iterator[T] {
+      def next() = iter.next()
+
+      def hasNext = {
+        if (iter.hasNext) {
+          true
+        } else {
+          val exitStatus = proc.waitFor()
+          if (exitStatus != 0) {
+            throw new Exception("Subprocess exited with status " + exitStatus)
+          }
+          false
+        }
+      }
+    }
+  }
+}
+
+class PythonRDD[T: ClassManifest](
+  parent: RDD[T], command: Seq[String], envVars: Map[String, String],
+  preservePartitoning: Boolean, pythonExec: String)
+  extends RDD[String](parent.context) with PythonRDDBase {
+
+  def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean, pythonExec: String) =
+    this(parent, command, Map(), preservePartitoning, pythonExec)
+
+  // Similar to Runtime.exec(), if we are given a single string, split it into words
+  // using a standard StringTokenizer (i.e. by spaces)
+  def this(parent: RDD[T], command: String, preservePartitoning: Boolean, pythonExec: String) =
+    this(parent, PipedRDD.tokenize(command), preservePartitoning, pythonExec)
+
+  override def splits = parent.splits
+
+  override val dependencies = List(new OneToOneDependency(parent))
+
+  override val partitioner = if (preservePartitoning) parent.partitioner else None
+
+  override def compute(split: Split): Iterator[String] =
+    compute(split, envVars, command, parent, pythonExec)
+
+  val asJavaRDD : JavaRDD[String] = JavaRDD.fromRDD(this)
+}
+
+class PythonPairRDD[T: ClassManifest] (
+  parent: RDD[T], command: Seq[String], envVars: Map[String, String],
+  preservePartitoning: Boolean, pythonExec: String)
+  extends RDD[(String, String)](parent.context) with PythonRDDBase {
+
+  def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean, pythonExec: String) =
+    this(parent, command, Map(), preservePartitoning, pythonExec)
+
+  // Similar to Runtime.exec(), if we are given a single string, split it into words
+  // using a standard StringTokenizer (i.e. by spaces)
+  def this(parent: RDD[T], command: String, preservePartitoning: Boolean, pythonExec: String) =
+    this(parent, PipedRDD.tokenize(command), preservePartitoning, pythonExec)
+
+  override def splits = parent.splits
+
+  override val dependencies = List(new OneToOneDependency(parent))
+
+  override val partitioner = if (preservePartitoning) parent.partitioner else None
+
+  override def compute(split: Split): Iterator[(String, String)] = {
+    compute(split, envVars, command, parent, pythonExec).grouped(2).map {
+      case Seq(a, b) => (a, b)
+      case x          => throw new Exception("Unexpected value: " + x)
+    }
+  }
+
+  val asJavaPairRDD : JavaPairRDD[String, String] = JavaPairRDD.fromRDD(this)
+}
+
+object PythonRDD {
+  def pythonDump[T](x: T): String = {
+    if (x.isInstanceOf[scala.Option[_]]) {
+      val t = x.asInstanceOf[scala.Option[_]]
+      t match {
+        case None => "*"
+        case Some(z) => pythonDump(z)
+      }
+    } else if (x.isInstanceOf[scala.Tuple2[_, _]]) {
+      val t = x.asInstanceOf[scala.Tuple2[_, _]]
+      "(" + pythonDump(t._1) + "," + pythonDump(t._2) + ")"
+    } else if (x.isInstanceOf[java.util.List[_]]) {
+      val objs = asScalaBuffer(x.asInstanceOf[java.util.List[_]]).map(pythonDump)
+      "[" + objs.mkString("|") + "]"
+    } else {
+      x.toString
+    }
+  }
+}
diff --git a/pyspark/pyspark/__init__.py b/pyspark/pyspark/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
new file mode 100644
index 0000000000..587ab12b5f
--- /dev/null
+++ b/pyspark/pyspark/context.py
@@ -0,0 +1,69 @@
+import os
+import atexit
+from tempfile import NamedTemporaryFile
+
+from pyspark.java_gateway import launch_gateway
+from pyspark.serializers import JSONSerializer, NopSerializer
+from pyspark.rdd import RDD, PairRDD
+
+
+class SparkContext(object):
+
+    gateway = launch_gateway()
+    jvm = gateway.jvm
+    python_dump = jvm.spark.api.python.PythonRDD.pythonDump
+
+    def __init__(self, master, name, defaultSerializer=JSONSerializer,
+            defaultParallelism=None, pythonExec='python'):
+        self.master = master
+        self.name = name
+        self._jsc = self.jvm.JavaSparkContext(master, name)
+        self.defaultSerializer = defaultSerializer
+        self.defaultParallelism = \
+            defaultParallelism or self._jsc.sc().defaultParallelism()
+        self.pythonExec = pythonExec
+
+    def __del__(self):
+        if self._jsc:
+            self._jsc.stop()
+
+    def stop(self):
+        self._jsc.stop()
+        self._jsc = None
+
+    def parallelize(self, c, numSlices=None, serializer=None):
+        serializer = serializer or self.defaultSerializer
+        numSlices = numSlices or self.defaultParallelism
+        # Calling the Java parallelize() method with an ArrayList is too slow,
+        # because it sends O(n) Py4J commands.  As an alternative, serialized
+        # objects are written to a file and loaded through textFile().
+        tempFile = NamedTemporaryFile(delete=False)
+        tempFile.writelines(serializer.dumps(x) + '\n' for x in c)
+        tempFile.close()
+        atexit.register(lambda: os.unlink(tempFile.name))
+        return self.textFile(tempFile.name, numSlices, serializer)
+
+    def parallelizePairs(self, c, numSlices=None, keySerializer=None,
+                         valSerializer=None):
+        """
+        >>> sc = SparkContext("local", "test")
+        >>> rdd = sc.parallelizePairs([(1, 2), (3, 4)])
+        >>> rdd.collect()
+        [(1, 2), (3, 4)]
+        """
+        keySerializer = keySerializer or self.defaultSerializer
+        valSerializer = valSerializer or self.defaultSerializer
+        numSlices = numSlices or self.defaultParallelism
+        tempFile = NamedTemporaryFile(delete=False)
+        for (k, v) in c:
+            tempFile.write(keySerializer.dumps(k).rstrip('\r\n') + '\n')
+            tempFile.write(valSerializer.dumps(v).rstrip('\r\n') + '\n')
+        tempFile.close()
+        atexit.register(lambda: os.unlink(tempFile.name))
+        jrdd = self.textFile(tempFile.name, numSlices)._pipePairs([], "echo")
+        return PairRDD(jrdd, self, keySerializer, valSerializer)
+
+    def textFile(self, name, numSlices=None, serializer=NopSerializer):
+        numSlices = numSlices or self.defaultParallelism
+        jrdd = self._jsc.textFile(name, numSlices)
+        return RDD(jrdd, self, serializer)
diff --git a/pyspark/pyspark/examples/__init__.py b/pyspark/pyspark/examples/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/pyspark/pyspark/examples/kmeans.py b/pyspark/pyspark/examples/kmeans.py
new file mode 100644
index 0000000000..0761d6e395
--- /dev/null
+++ b/pyspark/pyspark/examples/kmeans.py
@@ -0,0 +1,56 @@
+import sys
+
+from pyspark.context import SparkContext
+
+
+def parseVector(line):
+    return [float(x) for x in line.split(' ')]
+
+
+def addVec(x, y):
+    return [a + b for (a, b) in zip(x, y)]
+
+
+def squaredDist(x, y):
+    return sum((a - b) ** 2 for (a, b) in zip(x, y))
+
+
+def closestPoint(p, centers):
+    bestIndex = 0
+    closest = float("+inf")
+    for i in range(len(centers)):
+        tempDist = squaredDist(p, centers[i])
+        if tempDist < closest:
+            closest = tempDist
+            bestIndex = i
+    return bestIndex
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 5:
+        print >> sys.stderr, \
+            "Usage: PythonKMeans <master> <file> <k> <convergeDist>"
+        exit(-1)
+    sc = SparkContext(sys.argv[1], "PythonKMeans")
+    lines = sc.textFile(sys.argv[2])
+    data = lines.map(parseVector).cache()
+    K = int(sys.argv[3])
+    convergeDist = float(sys.argv[4])
+
+    kPoints = data.takeSample(False, K, 34)
+    tempDist = 1.0
+
+    while tempDist > convergeDist:
+        closest = data.mapPairs(
+            lambda p : (closestPoint(p, kPoints), (p, 1)))
+        pointStats = closest.reduceByKey(
+            lambda (x1, y1), (x2, y2): (addVec(x1, x2), y1 + y2))
+        newPoints = pointStats.mapPairs(
+            lambda (x, (y, z)): (x, [a / z for a in y])).collect()
+
+        tempDist = sum(squaredDist(kPoints[x], y) for (x, y) in newPoints)
+
+        for (x, y) in newPoints:
+            kPoints[x] = y
+
+    print "Final centers: " + str(kPoints)
diff --git a/pyspark/pyspark/examples/pi.py b/pyspark/pyspark/examples/pi.py
new file mode 100644
index 0000000000..ad77694c41
--- /dev/null
+++ b/pyspark/pyspark/examples/pi.py
@@ -0,0 +1,20 @@
+import sys
+from random import random
+from operator import add
+from pyspark.context import SparkContext
+
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        print >> sys.stderr, \
+            "Usage: PythonPi <host> [<slices>]"
+        exit(-1)
+    sc = SparkContext(sys.argv[1], "PythonKMeans")
+    slices = sys.argv[2] if len(sys.argv) > 2 else 2
+    n = 100000 * slices
+    def f(_):
+        x = random() * 2 - 1
+        y = random() * 2 - 1
+        return 1 if x ** 2 + y ** 2 < 1 else 0
+    count = sc.parallelize(xrange(1, n+1), slices).map(f).reduce(add)
+    print "Pi is roughly %f" % (4.0 * count / n)
diff --git a/pyspark/pyspark/examples/tc.py b/pyspark/pyspark/examples/tc.py
new file mode 100644
index 0000000000..2796fdc6ad
--- /dev/null
+++ b/pyspark/pyspark/examples/tc.py
@@ -0,0 +1,49 @@
+import sys
+from random import Random
+from pyspark.context import SparkContext
+
+numEdges = 200
+numVertices = 100
+rand = Random(42)
+
+
+def generateGraph():
+    edges = set()
+    while len(edges) < numEdges:
+        src = rand.randrange(0, numEdges)
+        dst = rand.randrange(0, numEdges)
+        if src != dst:
+            edges.add((src, dst))
+    return edges
+
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        print >> sys.stderr, \
+            "Usage: PythonTC <host> [<slices>]"
+        exit(-1)
+    sc = SparkContext(sys.argv[1], "PythonKMeans")
+    slices = sys.argv[2] if len(sys.argv) > 2 else 2
+    tc = sc.parallelizePairs(generateGraph(), slices).cache()
+
+    # Linear transitive closure: each round grows paths by one edge,
+    # by joining the graph's edges with the already-discovered paths.
+    # e.g. join the path (y, z) from the TC with the edge (x, y) from
+    # the graph to obtain the path (x, z).
+
+    # Because join() joins on keys, the edges are stored in reversed order.
+    edges = tc.mapPairs(lambda (x, y): (y, x))
+
+    oldCount = 0L
+    nextCount = tc.count()
+    while True:
+        oldCount = nextCount
+        # Perform the join, obtaining an RDD of (y, (z, x)) pairs,
+        # then project the result to obtain the new (x, z) paths.
+        new_edges = tc.join(edges).mapPairs(lambda (_, (a, b)): (b, a))
+        tc = tc.union(new_edges).distinct().cache()
+        nextCount = tc.count()
+        if nextCount == oldCount:
+            break
+
+    print "TC has %i edges" % tc.count()
diff --git a/pyspark/pyspark/java_gateway.py b/pyspark/pyspark/java_gateway.py
new file mode 100644
index 0000000000..2df80aee85
--- /dev/null
+++ b/pyspark/pyspark/java_gateway.py
@@ -0,0 +1,20 @@
+import glob
+import os
+from py4j.java_gateway import java_import, JavaGateway
+
+
+SPARK_HOME = os.environ["SPARK_HOME"]
+
+
+assembly_jar = glob.glob(os.path.join(SPARK_HOME, "core/target") + \
+    "/spark-core-assembly-*-SNAPSHOT.jar")[0]
+
+
+def launch_gateway():
+    gateway = JavaGateway.launch_gateway(classpath=assembly_jar,
+        javaopts=["-Xmx256m"], die_on_exit=True)
+    java_import(gateway.jvm, "spark.api.java.*")
+    java_import(gateway.jvm, "spark.api.python.*")
+    java_import(gateway.jvm, "scala.Tuple2")
+    java_import(gateway.jvm, "spark.api.python.PythonRDD.pythonDump")
+    return gateway
diff --git a/pyspark/pyspark/join.py b/pyspark/pyspark/join.py
new file mode 100644
index 0000000000..c67520fce8
--- /dev/null
+++ b/pyspark/pyspark/join.py
@@ -0,0 +1,104 @@
+"""
+Copyright (c) 2011, Douban Inc. <http://www.douban.com/>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+    * Neither the name of the Douban Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+from pyspark.serializers import PairSerializer, OptionSerializer, \
+    ArraySerializer
+
+
+def _do_python_join(rdd, other, numSplits, dispatch, valSerializer):
+    vs = rdd.mapPairs(lambda (k, v): (k, (1, v)))
+    ws = other.mapPairs(lambda (k, v): (k, (2, v)))
+    return vs.union(ws).groupByKey(numSplits) \
+             .flatMapValues(dispatch, valSerializer)
+
+
+def python_join(rdd, other, numSplits):
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        return [(v, w) for v in vbuf for w in wbuf]
+    valSerializer = PairSerializer(rdd.valSerializer, other.valSerializer)
+    return _do_python_join(rdd, other, numSplits, dispatch, valSerializer)
+
+
+def python_right_outer_join(rdd, other, numSplits):
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        if not vbuf:
+            vbuf.append(None)
+        return [(v, w) for v in vbuf for w in wbuf]
+    valSerializer = PairSerializer(OptionSerializer(rdd.valSerializer),
+                                   other.valSerializer)
+    return _do_python_join(rdd, other, numSplits, dispatch, valSerializer)
+
+
+def python_left_outer_join(rdd, other, numSplits):
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        if not wbuf:
+            wbuf.append(None)
+        return [(v, w) for v in vbuf for w in wbuf]
+    valSerializer = PairSerializer(rdd.valSerializer,
+                                   OptionSerializer(other.valSerializer))
+    return _do_python_join(rdd, other, numSplits, dispatch, valSerializer)
+
+
+def python_cogroup(rdd, other, numSplits):
+    resultValSerializer = PairSerializer(
+        ArraySerializer(rdd.valSerializer),
+        ArraySerializer(other.valSerializer))
+    vs = rdd.mapPairs(lambda (k, v): (k, (1, v)))
+    ws = other.mapPairs(lambda (k, v): (k, (2, v)))
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        return (vbuf, wbuf)
+    return vs.union(ws).groupByKey(numSplits) \
+             .mapValues(dispatch, resultValSerializer)
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
new file mode 100644
index 0000000000..c892e86b93
--- /dev/null
+++ b/pyspark/pyspark/rdd.py
@@ -0,0 +1,517 @@
+from base64 import standard_b64encode as b64enc
+from cloud.serialization import cloudpickle
+from itertools import chain
+
+from pyspark.serializers import PairSerializer, NopSerializer, \
+    OptionSerializer, ArraySerializer
+from pyspark.join import python_join, python_left_outer_join, \
+    python_right_outer_join, python_cogroup
+
+
+class RDD(object):
+
+    def __init__(self, jrdd, ctx, serializer=None):
+        self._jrdd = jrdd
+        self.is_cached = False
+        self.ctx = ctx
+        self.serializer = serializer or ctx.defaultSerializer
+
+    def _builder(self, jrdd, ctx):
+        return RDD(jrdd, ctx, self.serializer)
+
+    @property
+    def id(self):
+        return self._jrdd.id()
+
+    @property
+    def splits(self):
+        return self._jrdd.splits()
+
+    @classmethod
+    def _get_pipe_command(cls, command, functions):
+        if functions and not isinstance(functions, (list, tuple)):
+            functions = [functions]
+        worker_args = [command]
+        for f in functions:
+            worker_args.append(b64enc(cloudpickle.dumps(f)))
+        return " ".join(worker_args)
+
+    def cache(self):
+        self.is_cached = True
+        self._jrdd.cache()
+        return self
+
+    def map(self, f, serializer=None, preservesPartitioning=False):
+        return MappedRDD(self, f, serializer, preservesPartitioning)
+
+    def mapPairs(self, f, keySerializer=None, valSerializer=None,
+                 preservesPartitioning=False):
+        return PairMappedRDD(self, f, keySerializer, valSerializer,
+                             preservesPartitioning)
+
+    def flatMap(self, f, serializer=None):
+        """
+        >>> rdd = sc.parallelize([2, 3, 4])
+        >>> sorted(rdd.flatMap(lambda x: range(1, x)).collect())
+        [1, 1, 1, 2, 2, 3]
+        """
+        serializer = serializer or self.ctx.defaultSerializer
+        dumps = serializer.dumps
+        loads = self.serializer.loads
+        def func(x):
+            pickled_elems = (dumps(y) for y in f(loads(x)))
+            return "\n".join(pickled_elems) or None
+        pipe_command = RDD._get_pipe_command("map", [func])
+        class_manifest = self._jrdd.classManifest()
+        jrdd = self.ctx.jvm.PythonRDD(self._jrdd.rdd(), pipe_command,
+                                      False, self.ctx.pythonExec,
+                                      class_manifest).asJavaRDD()
+        return RDD(jrdd, self.ctx, serializer)
+
+    def flatMapPairs(self, f, keySerializer=None, valSerializer=None,
+                     preservesPartitioning=False):
+        """
+        >>> rdd = sc.parallelize([2, 3, 4])
+        >>> sorted(rdd.flatMapPairs(lambda x: [(x, x), (x, x)]).collect())
+        [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
+        """
+        keySerializer = keySerializer or self.ctx.defaultSerializer
+        valSerializer = valSerializer or self.ctx.defaultSerializer
+        dumpk = keySerializer.dumps
+        dumpv = valSerializer.dumps
+        loads = self.serializer.loads
+        def func(x):
+            pairs = f(loads(x))
+            pickled_pairs = ((dumpk(k), dumpv(v)) for (k, v) in pairs)
+            return "\n".join(chain.from_iterable(pickled_pairs)) or None
+        pipe_command = RDD._get_pipe_command("map", [func])
+        class_manifest = self._jrdd.classManifest()
+        python_rdd = self.ctx.jvm.PythonPairRDD(self._jrdd.rdd(), pipe_command,
+            preservesPartitioning, self.ctx.pythonExec, class_manifest)
+        return PairRDD(python_rdd.asJavaPairRDD(), self.ctx, keySerializer,
+           valSerializer)
+
+    def filter(self, f):
+        """
+        >>> rdd = sc.parallelize([1, 2, 3, 4, 5])
+        >>> rdd.filter(lambda x: x % 2 == 0).collect()
+        [2, 4]
+        """
+        loads = self.serializer.loads
+        def filter_func(x): return x if f(loads(x)) else None
+        return self._builder(self._pipe(filter_func), self.ctx)
+
+    def _pipe(self, functions, command="map"):
+        class_manifest = self._jrdd.classManifest()
+        pipe_command = RDD._get_pipe_command(command, functions)
+        python_rdd = self.ctx.jvm.PythonRDD(self._jrdd.rdd(), pipe_command,
+            False, self.ctx.pythonExec, class_manifest)
+        return python_rdd.asJavaRDD()
+
+    def _pipePairs(self, functions, command="mapPairs",
+            preservesPartitioning=False):
+        class_manifest = self._jrdd.classManifest()
+        pipe_command = RDD._get_pipe_command(command, functions)
+        python_rdd = self.ctx.jvm.PythonPairRDD(self._jrdd.rdd(), pipe_command,
+                preservesPartitioning, self.ctx.pythonExec, class_manifest)
+        return python_rdd.asJavaPairRDD()
+
+    def distinct(self):
+        """
+        >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
+        [1, 2, 3]
+        """
+        if self.serializer.is_comparable:
+            return self._builder(self._jrdd.distinct(), self.ctx)
+        return self.mapPairs(lambda x: (x, "")) \
+                   .reduceByKey(lambda x, _: x) \
+                   .map(lambda (x, _): x)
+
+    def sample(self, withReplacement, fraction, seed):
+        jrdd = self._jrdd.sample(withReplacement, fraction, seed)
+        return self._builder(jrdd, self.ctx)
+
+    def takeSample(self, withReplacement, num, seed):
+        vals = self._jrdd.takeSample(withReplacement, num, seed)
+        return [self.serializer.loads(self.ctx.python_dump(x)) for x in vals]
+
+    def union(self, other):
+        """
+        >>> rdd = sc.parallelize([1, 1, 2, 3])
+        >>> rdd.union(rdd).collect()
+        [1, 1, 2, 3, 1, 1, 2, 3]
+        """
+        return self._builder(self._jrdd.union(other._jrdd), self.ctx)
+
+    # TODO: sort
+
+    # TODO: Overload __add___?
+
+    # TODO: glom
+
+    def cartesian(self, other):
+        """
+        >>> rdd = sc.parallelize([1, 2])
+        >>> sorted(rdd.cartesian(rdd).collect())
+        [(1, 1), (1, 2), (2, 1), (2, 2)]
+        """
+        return PairRDD(self._jrdd.cartesian(other._jrdd), self.ctx)
+
+    # numsplits
+    def groupBy(self, f, numSplits=None):
+        """
+        >>> rdd = sc.parallelize([1, 1, 2, 3, 5, 8])
+        >>> sorted(rdd.groupBy(lambda x: x % 2).collect())
+        [(0, [2, 8]), (1, [1, 1, 3, 5])]
+        """
+        return self.mapPairs(lambda x: (f(x), x)).groupByKey(numSplits)
+
+    # TODO: pipe
+
+    # TODO: mapPartitions
+
+    def foreach(self, f):
+        """
+        >>> def f(x): print x
+        >>> sc.parallelize([1, 2, 3, 4, 5]).foreach(f)
+        """
+        self.map(f).collect()  # Force evaluation
+
+    def collect(self):
+        vals = self._jrdd.collect()
+        return [self.serializer.loads(self.ctx.python_dump(x)) for x in vals]
+
+    def reduce(self, f, serializer=None):
+        """
+        >>> import operator
+        >>> sc.parallelize([1, 2, 3, 4, 5]).reduce(operator.add)
+        15
+        """
+        serializer = serializer or self.ctx.defaultSerializer
+        loads = self.serializer.loads
+        dumps = serializer.dumps
+        def reduceFunction(x, acc):
+            if acc is None:
+                return loads(x)
+            else:
+                return f(loads(x), acc)
+        vals = self._pipe([reduceFunction, dumps], command="reduce").collect()
+        return reduce(f, (serializer.loads(x) for x in vals))
+
+    # TODO: fold
+
+    # TODO: aggregate
+
+    def count(self):
+        """
+        >>> sc.parallelize([2, 3, 4]).count()
+        3L
+        """
+        return self._jrdd.count()
+
+    # TODO: count approx methods
+
+    def take(self, num):
+        """
+        >>> sc.parallelize([2, 3, 4]).take(2)
+        [2, 3]
+        """
+        vals = self._jrdd.take(num)
+        return [self.serializer.loads(self.ctx.python_dump(x)) for x in vals]
+
+    def first(self):
+        """
+        >>> sc.parallelize([2, 3, 4]).first()
+        2
+        """
+        return self.serializer.loads(self.ctx.python_dump(self._jrdd.first()))
+
+    # TODO: saveAsTextFile
+
+    # TODO: saveAsObjectFile
+
+
+class PairRDD(RDD):
+
+    def __init__(self, jrdd, ctx, keySerializer=None, valSerializer=None):
+        RDD.__init__(self, jrdd, ctx)
+        self.keySerializer = keySerializer or ctx.defaultSerializer
+        self.valSerializer = valSerializer or ctx.defaultSerializer
+        self.serializer = \
+            PairSerializer(self.keySerializer, self.valSerializer)
+
+    def _builder(self, jrdd, ctx):
+        return PairRDD(jrdd, ctx, self.keySerializer, self.valSerializer)
+
+    def reduceByKey(self, func, numSplits=None):
+        """
+        >>> x = sc.parallelizePairs([("a", 1), ("b", 1), ("a", 1)])
+        >>> sorted(x.reduceByKey(lambda a, b: a + b).collect())
+        [('a', 2), ('b', 1)]
+        """
+        return self.combineByKey(lambda x: x, func, func, numSplits)
+
+    # TODO: reduceByKeyLocally()
+
+    # TODO: countByKey()
+
+    # TODO: partitionBy
+
+    def join(self, other, numSplits=None):
+        """
+        >>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
+        >>> y = sc.parallelizePairs([("a", 2), ("a", 3)])
+        >>> x.join(y).collect()
+        [('a', (1, 2)), ('a', (1, 3))]
+
+        Check that we get a PairRDD-like object back:
+        >>> assert x.join(y).join
+        """
+        assert self.keySerializer.name == other.keySerializer.name
+        if self.keySerializer.is_comparable:
+            return PairRDD(self._jrdd.join(other._jrdd),
+                self.ctx, self.keySerializer,
+                PairSerializer(self.valSerializer, other.valSerializer))
+        else:
+            return python_join(self, other, numSplits)
+
+    def leftOuterJoin(self, other, numSplits=None):
+        """
+        >>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
+        >>> y = sc.parallelizePairs([("a", 2)])
+        >>> sorted(x.leftOuterJoin(y).collect())
+        [('a', (1, 2)), ('b', (4, None))]
+        """
+        assert self.keySerializer.name == other.keySerializer.name
+        if self.keySerializer.is_comparable:
+            return PairRDD(self._jrdd.leftOuterJoin(other._jrdd),
+                self.ctx, self.keySerializer,
+                PairSerializer(self.valSerializer,
+                               OptionSerializer(other.valSerializer)))
+        else:
+            return python_left_outer_join(self, other, numSplits)
+
+    def rightOuterJoin(self, other, numSplits=None):
+        """
+        >>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
+        >>> y = sc.parallelizePairs([("a", 2)])
+        >>> sorted(y.rightOuterJoin(x).collect())
+        [('a', (2, 1)), ('b', (None, 4))]
+        """
+        assert self.keySerializer.name == other.keySerializer.name
+        if self.keySerializer.is_comparable:
+            return PairRDD(self._jrdd.rightOuterJoin(other._jrdd),
+                self.ctx, self.keySerializer,
+                PairSerializer(OptionSerializer(self.valSerializer),
+                               other.valSerializer))
+        else:
+            return python_right_outer_join(self, other, numSplits)
+
+    def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
+                     numSplits=None, serializer=None):
+        """
+        >>> x = sc.parallelizePairs([("a", 1), ("b", 1), ("a", 1)])
+        >>> def f(x): return x
+        >>> def add(a, b): return a + str(b)
+        >>> sorted(x.combineByKey(str, add, add).collect())
+        [('a', '11'), ('b', '1')]
+        """
+        serializer = serializer or self.ctx.defaultSerializer
+        if numSplits is None:
+            numSplits = self.ctx.defaultParallelism
+        # Use hash() to create keys that are comparable in Java.
+        loadkv = self.serializer.loads
+        def pairify(kv):
+            # TODO: add method to deserialize only the key or value from
+            # a PairSerializer?
+            key = loadkv(kv)[0]
+            return (str(hash(key)), kv)
+        partitioner = self.ctx.jvm.spark.HashPartitioner(numSplits)
+        jrdd = self._pipePairs(pairify).partitionBy(partitioner)
+        pairified = PairRDD(jrdd, self.ctx, NopSerializer, self.serializer)
+
+        loads = PairSerializer(NopSerializer, self.serializer).loads
+        dumpk = self.keySerializer.dumps
+        dumpc = serializer.dumps
+
+        functions = [createCombiner, mergeValue, mergeCombiners, loads, dumpk,
+                     dumpc]
+        jpairs = pairified._pipePairs(functions, "combine_by_key",
+                                      preservesPartitioning=True)
+        return PairRDD(jpairs, self.ctx, self.keySerializer, serializer)
+
+    def groupByKey(self, numSplits=None):
+        """
+        >>> x = sc.parallelizePairs([("a", 1), ("b", 1), ("a", 1)])
+        >>> sorted(x.groupByKey().collect())
+        [('a', [1, 1]), ('b', [1])]
+        """
+
+        def createCombiner(x):
+            return [x]
+
+        def mergeValue(xs, x):
+            xs.append(x)
+            return xs
+
+        def mergeCombiners(a, b):
+            return a + b
+
+        return self.combineByKey(createCombiner, mergeValue, mergeCombiners,
+                numSplits)
+
+    def collectAsMap(self):
+        """
+        >>> m = sc.parallelizePairs([(1, 2), (3, 4)]).collectAsMap()
+        >>> m[1]
+        2
+        >>> m[3]
+        4
+        """
+        m = self._jrdd.collectAsMap()
+        def loads(x):
+            (k, v) = x
+            return (self.keySerializer.loads(k), self.valSerializer.loads(v))
+        return dict(loads(x) for x in m.items())
+
+    def flatMapValues(self, f, valSerializer=None):
+        flat_map_fn = lambda (k, v): ((k, x) for x in f(v))
+        return self.flatMapPairs(flat_map_fn, self.keySerializer,
+                                 valSerializer, True)
+
+    def mapValues(self, f, valSerializer=None):
+        map_values_fn = lambda (k, v): (k, f(v))
+        return self.mapPairs(map_values_fn, self.keySerializer, valSerializer,
+                             True)
+
+    # TODO: support varargs cogroup of several RDDs.
+    def groupWith(self, other):
+        return self.cogroup(other)
+
+    def cogroup(self, other, numSplits=None):
+        """
+        >>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
+        >>> y = sc.parallelizePairs([("a", 2)])
+        >>> x.cogroup(y).collect()
+        [('a', ([1], [2])), ('b', ([4], []))]
+        """
+        assert self.keySerializer.name == other.keySerializer.name
+        resultValSerializer = PairSerializer(
+            ArraySerializer(self.valSerializer),
+            ArraySerializer(other.valSerializer))
+        if self.keySerializer.is_comparable:
+            return PairRDD(self._jrdd.cogroup(other._jrdd),
+                self.ctx, self.keySerializer, resultValSerializer)
+        else:
+            return python_cogroup(self, other, numSplits)
+
+    # TODO: `lookup` is disabled because we can't make direct comparisons based
+    # on the key; we need to compare the hash of the key to the hash of the
+    # keys in the pairs.  This could be an expensive operation, since those
+    # hashes aren't retained.
+
+    # TODO: file saving
+
+
+class MappedRDDBase(object):
+    def __init__(self, prev, func, serializer, preservesPartitioning=False):
+        if isinstance(prev, MappedRDDBase) and not prev.is_cached:
+            prev_func = prev.func
+            self.func = lambda x: func(prev_func(x))
+            self.preservesPartitioning = \
+                prev.preservesPartitioning and preservesPartitioning
+            self._prev_jrdd = prev._prev_jrdd
+            self._prev_serializer = prev._prev_serializer
+        else:
+            self.func = func
+            self.preservesPartitioning = preservesPartitioning
+            self._prev_jrdd = prev._jrdd
+            self._prev_serializer = prev.serializer
+        self.serializer = serializer or prev.ctx.defaultSerializer
+        self.is_cached = False
+        self.ctx = prev.ctx
+        self.prev = prev
+        self._jrdd_val = None
+
+
+class MappedRDD(MappedRDDBase, RDD):
+    """
+    >>> rdd = sc.parallelize([1, 2, 3, 4])
+    >>> rdd.map(lambda x: 2 * x).cache().map(lambda x: 2 * x).collect()
+    [4, 8, 12, 16]
+    >>> rdd.map(lambda x: 2 * x).map(lambda x: 2 * x).collect()
+    [4, 8, 12, 16]
+    """
+
+    @property
+    def _jrdd(self):
+        if not self._jrdd_val:
+            udf = self.func
+            loads = self._prev_serializer.loads
+            dumps = self.serializer.dumps
+            func = lambda x: dumps(udf(loads(x)))
+            pipe_command = RDD._get_pipe_command("map", [func])
+            class_manifest = self._prev_jrdd.classManifest()
+            python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
+                pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
+                class_manifest)
+            self._jrdd_val = python_rdd.asJavaRDD()
+        return self._jrdd_val
+
+
+class PairMappedRDD(MappedRDDBase, PairRDD):
+    """
+    >>> rdd = sc.parallelize([1, 2, 3, 4])
+    >>> rdd.mapPairs(lambda x: (x, x)) \\
+    ...    .mapPairs(lambda (x, y): (2*x, 2*y)) \\
+    ...    .collect()
+    [(2, 2), (4, 4), (6, 6), (8, 8)]
+    >>> rdd.mapPairs(lambda x: (x, x)) \\
+    ...    .mapPairs(lambda (x, y): (2*x, 2*y)) \\
+    ...    .map(lambda (x, _): x).collect()
+    [2, 4, 6, 8]
+    """
+
+    def __init__(self, prev, func, keySerializer=None, valSerializer=None,
+                 preservesPartitioning=False):
+        self.keySerializer = keySerializer or prev.ctx.defaultSerializer
+        self.valSerializer = valSerializer or prev.ctx.defaultSerializer
+        serializer = PairSerializer(self.keySerializer, self.valSerializer)
+        MappedRDDBase.__init__(self, prev, func, serializer,
+                               preservesPartitioning)
+
+    @property
+    def _jrdd(self):
+        if not self._jrdd_val:
+            udf = self.func
+            loads = self._prev_serializer.loads
+            dumpk = self.keySerializer.dumps
+            dumpv = self.valSerializer.dumps
+            def func(x):
+                (k, v) = udf(loads(x))
+                return (dumpk(k), dumpv(v))
+            pipe_command = RDD._get_pipe_command("mapPairs", [func])
+            class_manifest = self._prev_jrdd.classManifest()
+            self._jrdd_val = self.ctx.jvm.PythonPairRDD(self._prev_jrdd.rdd(),
+                pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
+                class_manifest).asJavaPairRDD()
+        return self._jrdd_val
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.serializers import PickleSerializer, JSONSerializer
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local', 'PythonTest',
+                               defaultSerializer=JSONSerializer)
+    doctest.testmod(globs=globs)
+    globs['sc'].stop()
+    globs['sc'] = SparkContext('local', 'PythonTest',
+                               defaultSerializer=PickleSerializer)
+    doctest.testmod(globs=globs)
+    globs['sc'].stop()
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
new file mode 100644
index 0000000000..b113f5656b
--- /dev/null
+++ b/pyspark/pyspark/serializers.py
@@ -0,0 +1,229 @@
+"""
+Data serialization methods.
+
+The Spark Python API is built on top of the Spark Java API.  RDDs created in
+Python are stored in Java as RDDs of Strings.  Python objects are automatically
+serialized/deserialized, so this representation is transparent to the end-user.
+
+------------------
+Serializer objects
+------------------
+
+`Serializer` objects are used to customize how an RDD's values are serialized.
+
+Each `Serializer` is a named tuple with four fields:
+
+    - A `dumps` function, for serializing a Python object to a string.
+
+    - A `loads` function, for deserializing a Python object from a string.
+
+    - An `is_comparable` field, True if equal Python objects are serialized to
+      equal strings, and False otherwise.
+
+    - A `name` field, used to identify the Serializer.  Serializers are
+      compared for equality by comparing their names.
+
+The serializer's output should be base64-encoded.
+
+------------------------------------------------------------------
+`is_comparable`: comparing serialized representations for equality
+------------------------------------------------------------------
+
+If `is_comparable` is False, the serializer's representations of equal objects
+are not required to be equal:
+
+>>> import pickle
+>>> a = {1: 0, 9: 0}
+>>> b = {9: 0, 1: 0}
+>>> a == b
+True
+>>> pickle.dumps(a) == pickle.dumps(b)
+False
+
+RDDs with comparable serializers can use native Java implementations of
+operations like join() and distinct(), which may lead to better performance by
+eliminating deserialization and Python comparisons.
+
+The default JSONSerializer produces comparable representations of common Python
+data structures.
+
+--------------------------------------
+Examples of serialized representations
+--------------------------------------
+
+The RDD transformations that use Python UDFs are implemented in terms of
+a modified `PipedRDD.pipe()` function.  For each record `x` in the RDD, the
+`pipe()` function pipes `x.toString()` to a Python worker process, which
+deserializes the string into a Python object, executes user-defined functions,
+and outputs serialized Python objects.
+
+The regular `toString()` method returns an ambiguous representation, due to the
+way that Scala `Option` instances are printed:
+
+>>> from context import SparkContext
+>>> sc = SparkContext("local", "SerializerDocs")
+>>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
+>>> y = sc.parallelizePairs([("a", 2)])
+
+>>> print y.rightOuterJoin(x)._jrdd.first().toString()
+(ImEi,(Some(Mg==),MQ==))
+
+In Java, preprocessing is performed to handle Option instances, so the Python
+process receives unambiguous input:
+
+>>> print sc.python_dump(y.rightOuterJoin(x)._jrdd.first())
+(ImEi,(Mg==,MQ==))
+
+The base64-encoding eliminates the need to escape newlines, parentheses and
+other special characters.
+
+----------------------
+Serializer composition
+----------------------
+
+In order to handle nested structures, which could contain object serialized
+with different serializers, the RDD module composes serializers.  For example,
+the serializers in the previous example are:
+
+>>> print x.serializer.name
+PairSerializer<JSONSerializer, JSONSerializer>
+
+>>> print y.serializer.name
+PairSerializer<JSONSerializer, JSONSerializer>
+
+>>> print y.rightOuterJoin(x).serializer.name
+PairSerializer<JSONSerializer, PairSerializer<OptionSerializer<JSONSerializer>, JSONSerializer>>
+"""
+from base64 import standard_b64encode, standard_b64decode
+from collections import namedtuple
+import cPickle
+import simplejson
+
+
+Serializer = namedtuple("Serializer",
+    ["dumps","loads", "is_comparable", "name"])
+
+
+NopSerializer = Serializer(str, str, True, "NopSerializer")
+
+
+JSONSerializer = Serializer(
+    lambda obj: standard_b64encode(simplejson.dumps(obj, sort_keys=True,
+        separators=(',', ':'))),
+    lambda s: simplejson.loads(standard_b64decode(s)),
+    True,
+    "JSONSerializer"
+)
+
+
+PickleSerializer = Serializer(
+    lambda obj: standard_b64encode(cPickle.dumps(obj)),
+    lambda s: cPickle.loads(standard_b64decode(s)),
+    False,
+    "PickleSerializer"
+)
+
+
+def OptionSerializer(serializer):
+    """
+    >>> ser = OptionSerializer(NopSerializer)
+    >>> ser.loads(ser.dumps("Hello, World!"))
+    'Hello, World!'
+    >>> ser.loads(ser.dumps(None)) is None
+    True
+    """
+    none_placeholder = '*'
+
+    def dumps(x):
+        if x is None:
+            return none_placeholder
+        else:
+            return serializer.dumps(x)
+
+    def loads(x):
+        if x == none_placeholder:
+            return None
+        else:
+            return serializer.loads(x)
+
+    name = "OptionSerializer<%s>" % serializer.name
+    return Serializer(dumps, loads, serializer.is_comparable, name)
+
+
+def PairSerializer(keySerializer, valSerializer):
+    """
+    Returns a Serializer for a (key, value) pair.
+
+    >>> ser = PairSerializer(JSONSerializer, JSONSerializer)
+    >>> ser.loads(ser.dumps((1, 2)))
+    (1, 2)
+
+    >>> ser = PairSerializer(JSONSerializer, ser)
+    >>> ser.loads(ser.dumps((1, (2, 3))))
+    (1, (2, 3))
+    """
+    def loads(kv):
+        try:
+            (key, val) = kv[1:-1].split(',', 1)
+            key = keySerializer.loads(key)
+            val = valSerializer.loads(val)
+            return (key, val)
+        except:
+            print "Error in deserializing pair from '%s'" % str(kv)
+            raise
+
+    def dumps(kv):
+        (key, val) = kv
+        return"(%s,%s)" % (keySerializer.dumps(key), valSerializer.dumps(val))
+    is_comparable = \
+        keySerializer.is_comparable and valSerializer.is_comparable
+    name = "PairSerializer<%s, %s>" % (keySerializer.name, valSerializer.name)
+    return Serializer(dumps, loads, is_comparable, name)
+
+
+def ArraySerializer(serializer):
+    """
+    >>> ser = ArraySerializer(JSONSerializer)
+    >>> ser.loads(ser.dumps([1, 2, 3, 4]))
+    [1, 2, 3, 4]
+    >>> ser = ArraySerializer(PairSerializer(JSONSerializer, PickleSerializer))
+    >>> ser.loads(ser.dumps([('a', 1), ('b', 2)]))
+    [('a', 1), ('b', 2)]
+    >>> ser.loads(ser.dumps([('a', 1)]))
+    [('a', 1)]
+    >>> ser.loads(ser.dumps([]))
+    []
+    """
+    def dumps(arr):
+        if arr == []:
+            return '[]'
+        else:
+            return '[' + '|'.join(serializer.dumps(x) for x in arr) + ']'
+
+    def loads(s):
+        if s == '[]':
+            return []
+        items = s[1:-1]
+        if '|' in items:
+            items = items.split('|')
+        else:
+            items = [items]
+        return [serializer.loads(x) for x in items]
+
+    name = "ArraySerializer<%s>" % serializer.name
+    return Serializer(dumps, loads, serializer.is_comparable, name)
+
+
+# TODO: IntegerSerializer
+
+
+# TODO: DoubleSerializer
+
+
+def _test():
+    import doctest
+    doctest.testmod()
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
new file mode 100644
index 0000000000..4d4cc939c3
--- /dev/null
+++ b/pyspark/pyspark/worker.py
@@ -0,0 +1,97 @@
+"""
+Worker that receives input from Piped RDD.
+"""
+import sys
+from base64 import standard_b64decode
+# CloudPickler needs to be imported so that depicklers are registered using the
+# copy_reg module.
+from cloud.serialization.cloudpickle import CloudPickler
+import cPickle
+
+
+# Redirect stdout to stderr so that users must return values from functions.
+old_stdout = sys.stdout
+sys.stdout = sys.stderr
+
+
+def load_function():
+    return cPickle.loads(standard_b64decode(sys.stdin.readline().strip()))
+
+
+def output(x):
+    for line in x.split("\n"):
+        old_stdout.write(line.rstrip("\r\n") + "\n")
+
+
+def read_input():
+    for line in sys.stdin:
+        yield line.rstrip("\r\n")
+
+
+def do_combine_by_key():
+    create_combiner = load_function()
+    merge_value = load_function()
+    merge_combiners = load_function()  # TODO: not used.
+    depickler = load_function()
+    key_pickler = load_function()
+    combiner_pickler = load_function()
+    combiners = {}
+    for line in read_input():
+        # Discard the hashcode added in the Python combineByKey() method.
+        (key, value) = depickler(line)[1]
+        if key not in combiners:
+            combiners[key] = create_combiner(value)
+        else:
+            combiners[key] = merge_value(combiners[key], value)
+    for (key, combiner) in combiners.iteritems():
+        output(key_pickler(key))
+        output(combiner_pickler(combiner))
+
+
+def do_map(map_pairs=False):
+    f = load_function()
+    for line in read_input():
+        try:
+            out = f(line)
+            if out is not None:
+                if map_pairs:
+                    for x in out:
+                        output(x)
+                else:
+                    output(out)
+        except:
+            sys.stderr.write("Error processing line '%s'\n" % line)
+            raise
+
+
+def do_reduce():
+    f = load_function()
+    dumps = load_function()
+    acc = None
+    for line in read_input():
+        acc = f(line, acc)
+    output(dumps(acc))
+
+
+def do_echo():
+    old_stdout.writelines(sys.stdin.readlines())
+
+
+def main():
+    command = sys.stdin.readline().strip()
+    if command == "map":
+        do_map(map_pairs=False)
+    elif command == "mapPairs":
+        do_map(map_pairs=True)
+    elif command == "combine_by_key":
+        do_combine_by_key()
+    elif command == "reduce":
+        do_reduce()
+    elif command == "echo":
+        do_echo()
+    else:
+        raise Exception("Unsupported command %s" % command)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pyspark/requirements.txt b/pyspark/requirements.txt
new file mode 100644
index 0000000000..d9b3fe40bd
--- /dev/null
+++ b/pyspark/requirements.txt
@@ -0,0 +1,9 @@
+# The Python API relies on some new features from the Py4J development branch.
+# pip can't install Py4J from git because the setup.py file for the Python
+# package is not at the root of the git repository.  It may be possible to
+# install Py4J from git once https://github.com/pypa/pip/pull/526 is merged.
+
+# git+git://github.com/bartdag/py4j.git@3dbf380d3d2cdeb9aab394454ea74d80c4aba1ea
+
+simplejson==2.6.1
+cloud==2.5.5
diff --git a/python/tc.py b/python/tc.py
new file mode 100644
index 0000000000..5dcc4317e0
--- /dev/null
+++ b/python/tc.py
@@ -0,0 +1,22 @@
+from rdd import SparkContext
+
+sc = SparkContext("local", "PythonWordCount")
+e = [(1, 2), (2, 3), (4, 1)]
+
+tc = sc.parallelizePairs(e)
+
+edges = tc.mapPairs(lambda (x, y): (y, x))
+
+oldCount = 0
+nextCount = tc.count()
+
+def project(x):
+    return (x[1][1], x[1][0])
+
+while nextCount != oldCount:
+    oldCount = nextCount
+    tc = tc.union(tc.join(edges).mapPairs(project)).distinct()
+    nextCount = tc.count()
+
+print "TC has %i edges" % tc.count()
+print tc.collect()

From 13b9514966a423f80f672f23f42ec3f0113936fd Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 19 Aug 2012 17:12:51 -0700
Subject: [PATCH 002/291] Bundle cloudpickle with pyspark.

---
 pyspark/pyspark/cloudpickle.py | 974 +++++++++++++++++++++++++++++++++
 pyspark/pyspark/rdd.py         |   2 +-
 pyspark/pyspark/worker.py      |   2 +-
 pyspark/requirements.txt       |   3 -
 4 files changed, 976 insertions(+), 5 deletions(-)
 create mode 100644 pyspark/pyspark/cloudpickle.py

diff --git a/pyspark/pyspark/cloudpickle.py b/pyspark/pyspark/cloudpickle.py
new file mode 100644
index 0000000000..6a7c23a069
--- /dev/null
+++ b/pyspark/pyspark/cloudpickle.py
@@ -0,0 +1,974 @@
+"""
+This class is defined to override standard pickle functionality
+
+The goals of it follow:
+-Serialize lambdas and nested functions to compiled byte code
+-Deal with main module correctly
+-Deal with other non-serializable objects
+
+It does not include an unpickler, as standard python unpickling suffices.
+
+This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
+<http://www.picloud.com>`_.
+
+Copyright (c) 2012, Regents of the University of California.
+Copyright (c) 2009 `PiCloud, Inc. <http://www.picloud.com>`_.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the University of California, Berkeley nor the
+      names of its contributors may be used to endorse or promote
+      products derived from this software without specific prior written
+      permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+
+import operator
+import os
+import pickle
+import struct
+import sys
+import types
+from functools import partial
+import itertools
+from copy_reg import _extension_registry, _inverted_registry, _extension_cache
+import new
+import dis
+import traceback
+
+#relevant opcodes
+STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL'))
+DELETE_GLOBAL = chr(dis.opname.index('DELETE_GLOBAL'))
+LOAD_GLOBAL = chr(dis.opname.index('LOAD_GLOBAL'))
+GLOBAL_OPS = [STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL]
+
+HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT)
+EXTENDED_ARG = chr(dis.EXTENDED_ARG)
+
+import logging
+cloudLog = logging.getLogger("Cloud.Transport")
+
+try:
+    import ctypes
+except (MemoryError, ImportError):
+    logging.warning('Exception raised on importing ctypes. Likely python bug.. some functionality will be disabled', exc_info = True)
+    ctypes = None
+    PyObject_HEAD = None
+else:
+
+    # for reading internal structures
+    PyObject_HEAD = [
+        ('ob_refcnt', ctypes.c_size_t),
+        ('ob_type', ctypes.c_void_p),
+    ]
+
+
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+# These helper functions were copied from PiCloud's util module.
+def islambda(func):
+    return getattr(func,'func_name') == '<lambda>'
+
+def xrange_params(xrangeobj):
+    """Returns a 3 element tuple describing the xrange start, step, and len
+    respectively
+
+    Note: Only guarentees that elements of xrange are the same. parameters may
+    be different.
+    e.g. xrange(1,1) is interpretted as xrange(0,0); both behave the same
+    though w/ iteration
+    """
+
+    xrange_len = len(xrangeobj)
+    if not xrange_len: #empty
+        return (0,1,0)
+    start = xrangeobj[0]
+    if xrange_len == 1: #one element
+        return start, 1, 1
+    return (start, xrangeobj[1] - xrangeobj[0], xrange_len)
+
+#debug variables intended for developer use:
+printSerialization = False
+printMemoization = False
+
+useForcedImports = True #Should I use forced imports for tracking?
+
+
+
+class CloudPickler(pickle.Pickler):
+
+    dispatch = pickle.Pickler.dispatch.copy()
+    savedForceImports = False
+    savedDjangoEnv = False #hack tro transport django environment
+
+    def __init__(self, file, protocol=None, min_size_to_save= 0):
+        pickle.Pickler.__init__(self,file,protocol)
+        self.modules = set() #set of modules needed to depickle
+        self.globals_ref = {}  # map ids to dictionary. used to ensure that functions can share global env
+
+    def dump(self, obj):
+        # note: not thread safe
+        # minimal side-effects, so not fixing
+        recurse_limit = 3000
+        base_recurse = sys.getrecursionlimit()
+        if base_recurse < recurse_limit:
+            sys.setrecursionlimit(recurse_limit)
+        self.inject_addons()
+        try:
+            return pickle.Pickler.dump(self, obj)
+        except RuntimeError, e:
+            if 'recursion' in e.args[0]:
+                msg = """Could not pickle object as excessively deep recursion required.
+                Try _fast_serialization=2 or contact PiCloud support"""
+                raise pickle.PicklingError(msg)
+        finally:
+            new_recurse = sys.getrecursionlimit()
+            if new_recurse == recurse_limit:
+                sys.setrecursionlimit(base_recurse)
+
+    def save_buffer(self, obj):
+        """Fallback to save_string"""
+        pickle.Pickler.save_string(self,str(obj))
+    dispatch[buffer] = save_buffer
+
+    #block broken objects
+    def save_unsupported(self, obj, pack=None):
+        raise pickle.PicklingError("Cannot pickle objects of type %s" % type(obj))
+    dispatch[types.GeneratorType] = save_unsupported
+
+    #python2.6+ supports slice pickling. some py2.5 extensions might as well.  We just test it
+    try:
+        slice(0,1).__reduce__()
+    except TypeError: #can't pickle -
+        dispatch[slice] = save_unsupported
+
+    #itertools objects do not pickle!
+    for v in itertools.__dict__.values():
+        if type(v) is type:
+            dispatch[v] = save_unsupported
+
+
+    def save_dict(self, obj):
+        """hack fix
+        If the dict is a global, deal with it in a special way
+        """
+        #print 'saving', obj
+        if obj is __builtins__:
+            self.save_reduce(_get_module_builtins, (), obj=obj)
+        else:
+            pickle.Pickler.save_dict(self, obj)
+    dispatch[pickle.DictionaryType] = save_dict
+
+
+    def save_module(self, obj, pack=struct.pack):
+        """
+        Save a module as an import
+        """
+        #print 'try save import', obj.__name__
+        self.modules.add(obj)
+        self.save_reduce(subimport,(obj.__name__,), obj=obj)
+    dispatch[types.ModuleType] = save_module    #new type
+
+    def save_codeobject(self, obj, pack=struct.pack):
+        """
+        Save a code object
+        """
+        #print 'try to save codeobj: ', obj
+        args = (
+            obj.co_argcount, obj.co_nlocals, obj.co_stacksize, obj.co_flags, obj.co_code,
+            obj.co_consts, obj.co_names, obj.co_varnames, obj.co_filename, obj.co_name,
+            obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, obj.co_cellvars
+        )
+        self.save_reduce(types.CodeType, args, obj=obj)
+    dispatch[types.CodeType] = save_codeobject    #new type
+
+    def save_function(self, obj, name=None, pack=struct.pack):
+        """ Registered with the dispatch to handle all function types.
+
+        Determines what kind of function obj is (e.g. lambda, defined at
+        interactive prompt, etc) and handles the pickling appropriately.
+        """
+        write = self.write
+
+        name = obj.__name__
+        modname = pickle.whichmodule(obj, name)
+        #print 'which gives %s %s %s' % (modname, obj, name)
+        try:
+            themodule = sys.modules[modname]
+        except KeyError: # eval'd items such as namedtuple give invalid items for their function __module__
+            modname = '__main__'
+
+        if modname == '__main__':
+            themodule = None
+
+        if themodule:
+            self.modules.add(themodule)
+
+        if not self.savedDjangoEnv:
+            #hack for django - if we detect the settings module, we transport it
+            django_settings = os.environ.get('DJANGO_SETTINGS_MODULE', '')
+            if django_settings:
+                django_mod = sys.modules.get(django_settings)
+                if django_mod:
+                    cloudLog.debug('Transporting django settings %s during save of %s', django_mod, name)
+                    self.savedDjangoEnv = True
+                    self.modules.add(django_mod)
+                    write(pickle.MARK)
+                    self.save_reduce(django_settings_load, (django_mod.__name__,), obj=django_mod)
+                    write(pickle.POP_MARK)
+
+
+        # if func is lambda, def'ed at prompt, is in main, or is nested, then
+        # we'll pickle the actual function object rather than simply saving a
+        # reference (as is done in default pickler), via save_function_tuple.
+        if islambda(obj) or obj.func_code.co_filename == '<stdin>' or themodule == None:
+            #Force server to import modules that have been imported in main
+            modList = None
+            if themodule == None and not self.savedForceImports:
+                mainmod = sys.modules['__main__']
+                if useForcedImports and hasattr(mainmod,'___pyc_forcedImports__'):
+                    modList = list(mainmod.___pyc_forcedImports__)
+                self.savedForceImports = True
+            self.save_function_tuple(obj, modList)
+            return
+        else:   # func is nested
+            klass = getattr(themodule, name, None)
+            if klass is None or klass is not obj:
+                self.save_function_tuple(obj, [themodule])
+                return
+
+        if obj.__dict__:
+            # essentially save_reduce, but workaround needed to avoid recursion
+            self.save(_restore_attr)
+            write(pickle.MARK + pickle.GLOBAL + modname + '\n' + name + '\n')
+            self.memoize(obj)
+            self.save(obj.__dict__)
+            write(pickle.TUPLE + pickle.REDUCE)
+        else:
+            write(pickle.GLOBAL + modname + '\n' + name + '\n')
+            self.memoize(obj)
+    dispatch[types.FunctionType] = save_function
+
+    def save_function_tuple(self, func, forced_imports):
+        """  Pickles an actual func object.
+
+        A func comprises: code, globals, defaults, closure, and dict.  We
+        extract and save these, injecting reducing functions at certain points
+        to recreate the func object.  Keep in mind that some of these pieces
+        can contain a ref to the func itself.  Thus, a naive save on these
+        pieces could trigger an infinite loop of save's.  To get around that,
+        we first create a skeleton func object using just the code (this is
+        safe, since this won't contain a ref to the func), and memoize it as
+        soon as it's created.  The other stuff can then be filled in later.
+        """
+        save = self.save
+        write = self.write
+
+        # save the modules (if any)
+        if forced_imports:
+            write(pickle.MARK)
+            save(_modules_to_main)
+            #print 'forced imports are', forced_imports
+
+            forced_names = map(lambda m: m.__name__, forced_imports)
+            save((forced_names,))
+
+            #save((forced_imports,))
+            write(pickle.REDUCE)
+            write(pickle.POP_MARK)
+
+        code, f_globals, defaults, closure, dct, base_globals = self.extract_func_data(func)
+
+        save(_fill_function)  # skeleton function updater
+        write(pickle.MARK)    # beginning of tuple that _fill_function expects
+
+        # create a skeleton function object and memoize it
+        save(_make_skel_func)
+        save((code, len(closure), base_globals))
+        write(pickle.REDUCE)
+        self.memoize(func)
+
+        # save the rest of the func data needed by _fill_function
+        save(f_globals)
+        save(defaults)
+        save(closure)
+        save(dct)
+        write(pickle.TUPLE)
+        write(pickle.REDUCE)  # applies _fill_function on the tuple
+
+    @staticmethod
+    def extract_code_globals(co):
+        """
+        Find all globals names read or written to by codeblock co
+        """
+        code = co.co_code
+        names = co.co_names
+        out_names = set()
+
+        n = len(code)
+        i = 0
+        extended_arg = 0
+        while i < n:
+            op = code[i]
+
+            i = i+1
+            if op >= HAVE_ARGUMENT:
+                oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
+                extended_arg = 0
+                i = i+2
+                if op == EXTENDED_ARG:
+                    extended_arg = oparg*65536L
+                if op in GLOBAL_OPS:
+                    out_names.add(names[oparg])
+        #print 'extracted', out_names, ' from ', names
+        return out_names
+
+    def extract_func_data(self, func):
+        """
+        Turn the function into a tuple of data necessary to recreate it:
+            code, globals, defaults, closure, dict
+        """
+        code = func.func_code
+
+        # extract all global ref's
+        func_global_refs = CloudPickler.extract_code_globals(code)
+        if code.co_consts:   # see if nested function have any global refs
+            for const in code.co_consts:
+                if type(const) is types.CodeType and const.co_names:
+                    func_global_refs = func_global_refs.union( CloudPickler.extract_code_globals(const))
+        # process all variables referenced by global environment
+        f_globals = {}
+        for var in func_global_refs:
+            #Some names, such as class functions are not global - we don't need them
+            if func.func_globals.has_key(var):
+                f_globals[var] = func.func_globals[var]
+
+        # defaults requires no processing
+        defaults = func.func_defaults
+
+        def get_contents(cell):
+            try:
+                return cell.cell_contents
+            except ValueError, e: #cell is empty error on not yet assigned
+                raise pickle.PicklingError('Function to be pickled has free variables that are referenced before assignment in enclosing scope')
+
+
+        # process closure
+        if func.func_closure:
+            closure = map(get_contents, func.func_closure)
+        else:
+            closure = []
+
+        # save the dict
+        dct = func.func_dict
+
+        if printSerialization:
+            outvars = ['code: ' + str(code) ]
+            outvars.append('globals: ' + str(f_globals))
+            outvars.append('defaults: ' + str(defaults))
+            outvars.append('closure: ' + str(closure))
+            print 'function ', func, 'is extracted to: ', ', '.join(outvars)
+
+        base_globals = self.globals_ref.get(id(func.func_globals), {})
+        self.globals_ref[id(func.func_globals)] = base_globals
+
+        return (code, f_globals, defaults, closure, dct, base_globals)
+
+    def save_global(self, obj, name=None, pack=struct.pack):
+        write = self.write
+        memo = self.memo
+
+        if name is None:
+            name = obj.__name__
+
+        modname = getattr(obj, "__module__", None)
+        if modname is None:
+            modname = pickle.whichmodule(obj, name)
+
+        try:
+            __import__(modname)
+            themodule = sys.modules[modname]
+        except (ImportError, KeyError, AttributeError):  #should never occur
+            raise pickle.PicklingError(
+                "Can't pickle %r: Module %s cannot be found" %
+                (obj, modname))
+
+        if modname == '__main__':
+            themodule = None
+
+        if themodule:
+            self.modules.add(themodule)
+
+        sendRef = True
+        typ = type(obj)
+        #print 'saving', obj, typ
+        try:
+            try: #Deal with case when getattribute fails with exceptions
+                klass = getattr(themodule, name)
+            except (AttributeError):
+                if modname == '__builtin__':  #new.* are misrepeported
+                    modname = 'new'
+                    __import__(modname)
+                    themodule = sys.modules[modname]
+                    try:
+                        klass = getattr(themodule, name)
+                    except AttributeError, a:
+                        #print themodule, name, obj, type(obj)
+                        raise pickle.PicklingError("Can't pickle builtin %s" % obj)
+                else:
+                    raise
+
+        except (ImportError, KeyError, AttributeError):
+            if typ == types.TypeType or typ == types.ClassType:
+                sendRef = False
+            else: #we can't deal with this
+                raise
+        else:
+            if klass is not obj and (typ == types.TypeType or typ == types.ClassType):
+                sendRef = False
+        if not sendRef:
+            #note: Third party types might crash this - add better checks!
+            d = dict(obj.__dict__) #copy dict proxy to a dict
+            if not isinstance(d.get('__dict__', None), property): # don't extract dict that are properties
+                d.pop('__dict__',None)
+            d.pop('__weakref__',None)
+
+            # hack as __new__ is stored differently in the __dict__
+            new_override = d.get('__new__', None)
+            if new_override:
+                d['__new__'] = obj.__new__
+
+            self.save_reduce(type(obj),(obj.__name__,obj.__bases__,
+                                   d),obj=obj)
+            #print 'internal reduce dask %s %s'  % (obj, d)
+            return
+
+        if self.proto >= 2:
+            code = _extension_registry.get((modname, name))
+            if code:
+                assert code > 0
+                if code <= 0xff:
+                    write(pickle.EXT1 + chr(code))
+                elif code <= 0xffff:
+                    write("%c%c%c" % (pickle.EXT2, code&0xff, code>>8))
+                else:
+                    write(pickle.EXT4 + pack("<i", code))
+                return
+
+        write(pickle.GLOBAL + modname + '\n' + name + '\n')
+        self.memoize(obj)
+    dispatch[types.ClassType] = save_global
+    dispatch[types.BuiltinFunctionType] = save_global
+    dispatch[types.TypeType] = save_global
+
+    def save_instancemethod(self, obj):
+        #Memoization rarely is ever useful due to python bounding
+        self.save_reduce(types.MethodType, (obj.im_func, obj.im_self,obj.im_class), obj=obj)
+    dispatch[types.MethodType] = save_instancemethod
+
+    def save_inst_logic(self, obj):
+        """Inner logic to save instance. Based off pickle.save_inst
+        Supports __transient__"""
+        cls = obj.__class__
+
+        memo  = self.memo
+        write = self.write
+        save  = self.save
+
+        if hasattr(obj, '__getinitargs__'):
+            args = obj.__getinitargs__()
+            len(args) # XXX Assert it's a sequence
+            pickle._keep_alive(args, memo)
+        else:
+            args = ()
+
+        write(pickle.MARK)
+
+        if self.bin:
+            save(cls)
+            for arg in args:
+                save(arg)
+            write(pickle.OBJ)
+        else:
+            for arg in args:
+                save(arg)
+            write(pickle.INST + cls.__module__ + '\n' + cls.__name__ + '\n')
+
+        self.memoize(obj)
+
+        try:
+            getstate = obj.__getstate__
+        except AttributeError:
+            stuff = obj.__dict__
+            #remove items if transient
+            if hasattr(obj, '__transient__'):
+                transient = obj.__transient__
+                stuff = stuff.copy()
+                for k in list(stuff.keys()):
+                    if k in transient:
+                        del stuff[k]
+        else:
+            stuff = getstate()
+            pickle._keep_alive(stuff, memo)
+        save(stuff)
+        write(pickle.BUILD)
+
+
+    def save_inst(self, obj):
+        # Hack to detect PIL Image instances without importing Imaging
+        # PIL can be loaded with multiple names, so we don't check sys.modules for it
+        if hasattr(obj,'im') and hasattr(obj,'palette') and 'Image' in obj.__module__:
+            self.save_image(obj)
+        else:
+            self.save_inst_logic(obj)
+    dispatch[types.InstanceType] = save_inst
+
+    def save_property(self, obj):
+        # properties not correctly saved in python
+        self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj)
+    dispatch[property] = save_property
+
+    def save_itemgetter(self, obj):
+        """itemgetter serializer (needed for namedtuple support)
+        a bit of a pain as we need to read ctypes internals"""
+        class ItemGetterType(ctypes.Structure):
+            _fields_ = PyObject_HEAD + [
+                ('nitems', ctypes.c_size_t),
+                ('item', ctypes.py_object)
+            ]
+
+
+        itemgetter_obj = ctypes.cast(ctypes.c_void_p(id(obj)), ctypes.POINTER(ItemGetterType)).contents
+        return self.save_reduce(operator.itemgetter, (itemgetter_obj.item,))
+
+    if PyObject_HEAD:
+        dispatch[operator.itemgetter] = save_itemgetter
+
+
+
+    def save_reduce(self, func, args, state=None,
+                    listitems=None, dictitems=None, obj=None):
+        """Modified to support __transient__ on new objects
+        Change only affects protocol level 2 (which is always used by PiCloud"""
+        # Assert that args is a tuple or None
+        if not isinstance(args, types.TupleType):
+            raise pickle.PicklingError("args from reduce() should be a tuple")
+
+        # Assert that func is callable
+        if not hasattr(func, '__call__'):
+            raise pickle.PicklingError("func from reduce should be callable")
+
+        save = self.save
+        write = self.write
+
+        # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
+        if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
+            #Added fix to allow transient
+            cls = args[0]
+            if not hasattr(cls, "__new__"):
+                raise pickle.PicklingError(
+                    "args[0] from __newobj__ args has no __new__")
+            if obj is not None and cls is not obj.__class__:
+                raise pickle.PicklingError(
+                    "args[0] from __newobj__ args has the wrong class")
+            args = args[1:]
+            save(cls)
+
+            #Don't pickle transient entries
+            if hasattr(obj, '__transient__'):
+                transient = obj.__transient__
+                state = state.copy()
+
+                for k in list(state.keys()):
+                    if k in transient:
+                        del state[k]
+
+            save(args)
+            write(pickle.NEWOBJ)
+        else:
+            save(func)
+            save(args)
+            write(pickle.REDUCE)
+
+        if obj is not None:
+            self.memoize(obj)
+
+        # More new special cases (that work with older protocols as
+        # well): when __reduce__ returns a tuple with 4 or 5 items,
+        # the 4th and 5th item should be iterators that provide list
+        # items and dict items (as (key, value) tuples), or None.
+
+        if listitems is not None:
+            self._batch_appends(listitems)
+
+        if dictitems is not None:
+            self._batch_setitems(dictitems)
+
+        if state is not None:
+            #print 'obj %s has state %s' % (obj, state)
+            save(state)
+            write(pickle.BUILD)
+
+
+    def save_xrange(self, obj):
+        """Save an xrange object in python 2.5
+        Python 2.6 supports this natively
+        """
+        range_params = xrange_params(obj)
+        self.save_reduce(_build_xrange,range_params)
+
+    #python2.6+ supports xrange pickling. some py2.5 extensions might as well.  We just test it
+    try:
+        xrange(0).__reduce__()
+    except TypeError: #can't pickle -- use PiCloud pickler
+        dispatch[xrange] = save_xrange
+
+    def save_partial(self, obj):
+        """Partial objects do not serialize correctly in python2.x -- this fixes the bugs"""
+        self.save_reduce(_genpartial, (obj.func, obj.args, obj.keywords))
+
+    if sys.version_info < (2,7): #2.7 supports partial pickling
+        dispatch[partial] = save_partial
+
+
+    def save_file(self, obj):
+        """Save a file"""
+        import StringIO as pystringIO #we can't use cStringIO as it lacks the name attribute
+        from ..transport.adapter import SerializingAdapter
+
+        if not hasattr(obj, 'name') or  not hasattr(obj, 'mode'):
+            raise pickle.PicklingError("Cannot pickle files that do not map to an actual file")
+        if obj.name == '<stdout>':
+            return self.save_reduce(getattr, (sys,'stdout'), obj=obj)
+        if obj.name == '<stderr>':
+            return self.save_reduce(getattr, (sys,'stderr'), obj=obj)
+        if obj.name == '<stdin>':
+            raise pickle.PicklingError("Cannot pickle standard input")
+        if  hasattr(obj, 'isatty') and obj.isatty():
+            raise pickle.PicklingError("Cannot pickle files that map to tty objects")
+        if 'r' not in obj.mode:
+            raise pickle.PicklingError("Cannot pickle files that are not opened for reading")
+        name = obj.name
+        try:
+            fsize = os.stat(name).st_size
+        except OSError:
+            raise pickle.PicklingError("Cannot pickle file %s as it cannot be stat" % name)
+
+        if obj.closed:
+            #create an empty closed string io
+            retval = pystringIO.StringIO("")
+            retval.close()
+        elif not fsize: #empty file
+            retval = pystringIO.StringIO("")
+            try:
+                tmpfile = file(name)
+                tst = tmpfile.read(1)
+            except IOError:
+                raise pickle.PicklingError("Cannot pickle file %s as it cannot be read" % name)
+            tmpfile.close()
+            if tst != '':
+                raise pickle.PicklingError("Cannot pickle file %s as it does not appear to map to a physical, real file" % name)
+        elif fsize > SerializingAdapter.max_transmit_data:
+            raise pickle.PicklingError("Cannot pickle file %s as it exceeds cloudconf.py's max_transmit_data of %d" %
+                                       (name,SerializingAdapter.max_transmit_data))
+        else:
+            try:
+                tmpfile = file(name)
+                contents = tmpfile.read(SerializingAdapter.max_transmit_data)
+                tmpfile.close()
+            except IOError:
+                raise pickle.PicklingError("Cannot pickle file %s as it cannot be read" % name)
+            retval = pystringIO.StringIO(contents)
+            curloc = obj.tell()
+            retval.seek(curloc)
+
+        retval.name = name
+        self.save(retval)  #save stringIO
+        self.memoize(obj)
+
+    dispatch[file] = save_file
+    """Special functions for Add-on libraries"""
+
+    def inject_numpy(self):
+        numpy = sys.modules.get('numpy')
+        if not numpy or not hasattr(numpy, 'ufunc'):
+            return
+        self.dispatch[numpy.ufunc] = self.__class__.save_ufunc
+
+    numpy_tst_mods = ['numpy', 'scipy.special']
+    def save_ufunc(self, obj):
+        """Hack function for saving numpy ufunc objects"""
+        name = obj.__name__
+        for tst_mod_name in self.numpy_tst_mods:
+            tst_mod = sys.modules.get(tst_mod_name, None)
+            if tst_mod:
+                if name in tst_mod.__dict__:
+                    self.save_reduce(_getobject, (tst_mod_name, name))
+                    return
+        raise pickle.PicklingError('cannot save %s. Cannot resolve what module it is defined in' % str(obj))
+
+    def inject_timeseries(self):
+        """Handle bugs with pickling scikits timeseries"""
+        tseries = sys.modules.get('scikits.timeseries.tseries')
+        if not tseries or not hasattr(tseries, 'Timeseries'):
+            return
+        self.dispatch[tseries.Timeseries] = self.__class__.save_timeseries
+
+    def save_timeseries(self, obj):
+        import scikits.timeseries.tseries as ts
+
+        func, reduce_args, state = obj.__reduce__()
+        if func != ts._tsreconstruct:
+            raise pickle.PicklingError('timeseries using unexpected reconstruction function %s' % str(func))
+        state = (1,
+                         obj.shape,
+                         obj.dtype,
+                         obj.flags.fnc,
+                         obj._data.tostring(),
+                         ts.getmaskarray(obj).tostring(),
+                         obj._fill_value,
+                         obj._dates.shape,
+                         obj._dates.__array__().tostring(),
+                         obj._dates.dtype, #added -- preserve type
+                         obj.freq,
+                         obj._optinfo,
+                         )
+        return self.save_reduce(_genTimeSeries, (reduce_args, state))
+
+    def inject_email(self):
+        """Block email LazyImporters from being saved"""
+        email = sys.modules.get('email')
+        if not email:
+            return
+        self.dispatch[email.LazyImporter] = self.__class__.save_unsupported
+
+    def inject_addons(self):
+        """Plug in system. Register additional pickling functions if modules already loaded"""
+        self.inject_numpy()
+        self.inject_timeseries()
+        self.inject_email()
+
+    """Python Imaging Library"""
+    def save_image(self, obj):
+        if not obj.im and obj.fp and 'r' in obj.fp.mode and obj.fp.name \
+            and not obj.fp.closed and (not hasattr(obj, 'isatty') or not obj.isatty()):
+            #if image not loaded yet -- lazy load
+            self.save_reduce(_lazyloadImage,(obj.fp,), obj=obj)
+        else:
+            #image is loaded - just transmit it over
+            self.save_reduce(_generateImage, (obj.size, obj.mode, obj.tostring()), obj=obj)
+
+    """
+    def memoize(self, obj):
+        pickle.Pickler.memoize(self, obj)
+        if printMemoization:
+            print 'memoizing ' + str(obj)
+    """
+
+
+
+# Shorthands for legacy support
+
+def dump(obj, file, protocol=2):
+    CloudPickler(file, protocol).dump(obj)
+
+def dumps(obj, protocol=2):
+    file = StringIO()
+
+    cp = CloudPickler(file,protocol)
+    cp.dump(obj)
+
+    #print 'cloud dumped', str(obj), str(cp.modules)
+
+    return file.getvalue()
+
+
+#hack for __import__ not working as desired
+def subimport(name):
+    __import__(name)
+    return sys.modules[name]
+
+#hack to load django settings:
+def django_settings_load(name):
+    modified_env = False
+
+    if 'DJANGO_SETTINGS_MODULE' not in os.environ:
+        os.environ['DJANGO_SETTINGS_MODULE'] = name # must set name first due to circular deps
+        modified_env = True
+    try:
+        module = subimport(name)
+    except Exception, i:
+        print >> sys.stderr, 'Cloud not import django settings %s:' % (name)
+        print_exec(sys.stderr)
+        if modified_env:
+            del os.environ['DJANGO_SETTINGS_MODULE']
+    else:
+        #add project directory to sys,path:
+        if hasattr(module,'__file__'):
+            dirname = os.path.split(module.__file__)[0] + '/'
+            sys.path.append(dirname)
+
+# restores function attributes
+def _restore_attr(obj, attr):
+    for key, val in attr.items():
+        setattr(obj, key, val)
+    return obj
+
+def _get_module_builtins():
+    return pickle.__builtins__
+
+def print_exec(stream):
+    ei = sys.exc_info()
+    traceback.print_exception(ei[0], ei[1], ei[2], None, stream)
+
+def _modules_to_main(modList):
+    """Force every module in modList to be placed into main"""
+    if not modList:
+        return
+
+    main = sys.modules['__main__']
+    for modname in modList:
+        if type(modname) is str:
+            try:
+                mod = __import__(modname)
+            except Exception, i: #catch all...
+                sys.stderr.write('warning: could not import %s\n.  Your function may unexpectedly error due to this import failing; \
+A version mismatch is likely.  Specific error was:\n' % modname)
+                print_exec(sys.stderr)
+            else:
+                setattr(main,mod.__name__, mod)
+        else:
+            #REVERSE COMPATIBILITY FOR CLOUD CLIENT 1.5 (WITH EPD)
+            #In old version actual module was sent
+            setattr(main,modname.__name__, modname)
+
+#object generators:
+def _build_xrange(start, step, len):
+    """Built xrange explicitly"""
+    return xrange(start, start + step*len, step)
+
+def _genpartial(func, args, kwds):
+    if not args:
+        args = ()
+    if not kwds:
+        kwds = {}
+    return partial(func, *args, **kwds)
+
+
+def _fill_function(func, globals, defaults, closure, dict):
+    """ Fills in the rest of function data into the skeleton function object
+        that were created via _make_skel_func().
+         """
+    func.func_globals.update(globals)
+    func.func_defaults = defaults
+    func.func_dict = dict
+
+    if len(closure) != len(func.func_closure):
+        raise pickle.UnpicklingError("closure lengths don't match up")
+    for i in range(len(closure)):
+        _change_cell_value(func.func_closure[i], closure[i])
+
+    return func
+
+def _make_skel_func(code, num_closures, base_globals = None):
+    """ Creates a skeleton function object that contains just the provided
+        code and the correct number of cells in func_closure.  All other
+        func attributes (e.g. func_globals) are empty.
+    """
+    #build closure (cells):
+    if not ctypes:
+        raise Exception('ctypes failed to import; cannot build function')
+
+    cellnew = ctypes.pythonapi.PyCell_New
+    cellnew.restype = ctypes.py_object
+    cellnew.argtypes = (ctypes.py_object,)
+    dummy_closure = tuple(map(lambda i: cellnew(None), range(num_closures)))
+
+    if base_globals is None:
+        base_globals = {}
+    base_globals['__builtins__'] = __builtins__
+
+    return types.FunctionType(code, base_globals,
+                              None, None, dummy_closure)
+
+# this piece of opaque code is needed below to modify 'cell' contents
+cell_changer_code = new.code(
+    1, 1, 2, 0,
+    ''.join([
+        chr(dis.opmap['LOAD_FAST']), '\x00\x00',
+        chr(dis.opmap['DUP_TOP']),
+        chr(dis.opmap['STORE_DEREF']), '\x00\x00',
+        chr(dis.opmap['RETURN_VALUE'])
+    ]),
+    (), (), ('newval',), '<nowhere>', 'cell_changer', 1, '', ('c',), ()
+)
+
+def _change_cell_value(cell, newval):
+    """ Changes the contents of 'cell' object to newval """
+    return new.function(cell_changer_code, {}, None, (), (cell,))(newval)
+
+"""Constructors for 3rd party libraries
+Note: These can never be renamed due to client compatibility issues"""
+
+def _getobject(modname, attribute):
+    mod = __import__(modname)
+    return mod.__dict__[attribute]
+
+def _generateImage(size, mode, str_rep):
+    """Generate image from string representation"""
+    import Image
+    i = Image.new(mode, size)
+    i.fromstring(str_rep)
+    return i
+
+def _lazyloadImage(fp):
+    import Image
+    fp.seek(0)  #works in almost any case
+    return Image.open(fp)
+
+"""Timeseries"""
+def _genTimeSeries(reduce_args, state):
+    import scikits.timeseries.tseries as ts
+    from numpy import ndarray
+    from numpy.ma import MaskedArray
+
+
+    time_series = ts._tsreconstruct(*reduce_args)
+
+    #from setstate modified
+    (ver, shp, typ, isf, raw, msk, flv, dsh, dtm, dtyp, frq, infodict) = state
+    #print 'regenerating %s' % dtyp
+
+    MaskedArray.__setstate__(time_series, (ver, shp, typ, isf, raw, msk, flv))
+    _dates = time_series._dates
+    #_dates.__setstate__((ver, dsh, typ, isf, dtm, frq))  #use remote typ
+    ndarray.__setstate__(_dates,(dsh,dtyp, isf, dtm))
+    _dates.freq = frq
+    _dates._cachedinfo.update(dict(full=None, hasdups=None, steps=None,
+                                   toobj=None, toord=None, tostr=None))
+    # Update the _optinfo dictionary
+    time_series._optinfo.update(infodict)
+    return time_series
+
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index c892e86b93..5579c56de3 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,5 +1,5 @@
 from base64 import standard_b64encode as b64enc
-from cloud.serialization import cloudpickle
+from pyspark import cloudpickle
 from itertools import chain
 
 from pyspark.serializers import PairSerializer, NopSerializer, \
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index 4d4cc939c3..4c4b02fce4 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -5,7 +5,7 @@ import sys
 from base64 import standard_b64decode
 # CloudPickler needs to be imported so that depicklers are registered using the
 # copy_reg module.
-from cloud.serialization.cloudpickle import CloudPickler
+from pyspark.cloudpickle import CloudPickler
 import cPickle
 
 
diff --git a/pyspark/requirements.txt b/pyspark/requirements.txt
index d9b3fe40bd..71e2bc2b89 100644
--- a/pyspark/requirements.txt
+++ b/pyspark/requirements.txt
@@ -4,6 +4,3 @@
 # install Py4J from git once https://github.com/pypa/pip/pull/526 is merged.
 
 # git+git://github.com/bartdag/py4j.git@3dbf380d3d2cdeb9aab394454ea74d80c4aba1ea
-
-simplejson==2.6.1
-cloud==2.5.5

From fd94e5443c99775bfad1928729f5075c900ad0f9 Mon Sep 17 00:00:00 2001
From: Josh Rosen <rosenville@gmail.com>
Date: Sat, 18 Aug 2012 16:07:10 -0700
Subject: [PATCH 003/291] Use only cPickle for serialization in Python API.

Objects serialized with JSON can be compared for equality, but JSON can be slow
to serialize and only supports a limited range of data types.
---
 .../scala/spark/api/python/PythonRDD.scala    | 192 ++++++--
 pyspark/pyspark/context.py                    |  53 +--
 pyspark/pyspark/java_gateway.py               |   1 -
 pyspark/pyspark/join.py                       |  32 +-
 pyspark/pyspark/rdd.py                        | 422 +++++++-----------
 pyspark/pyspark/serializers.py                | 233 +---------
 pyspark/pyspark/worker.py                     |  64 +--
 7 files changed, 387 insertions(+), 610 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 660ad48afe..b9a0168d18 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -1,22 +1,26 @@
 package spark.api.python
 
-import java.io.PrintWriter
+import java.io._
 
 import scala.collection.Map
 import scala.collection.JavaConversions._
 import scala.io.Source
 import spark._
-import api.java.{JavaPairRDD, JavaRDD}
+import api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
+import scala.{collection, Some}
+import collection.parallel.mutable
+import scala.collection
 import scala.Some
 
 trait PythonRDDBase {
   def compute[T](split: Split, envVars: Map[String, String],
-    command: Seq[String], parent: RDD[T], pythonExec: String): Iterator[String]= {
-    val currentEnvVars = new ProcessBuilder().environment()
-    val SPARK_HOME = currentEnvVars.get("SPARK_HOME")
+    command: Seq[String], parent: RDD[T], pythonExec: String): Iterator[Array[Byte]] = {
+    val SPARK_HOME = new ProcessBuilder().environment().get("SPARK_HOME")
 
     val pb = new ProcessBuilder(Seq(pythonExec, SPARK_HOME + "/pyspark/pyspark/worker.py"))
     // Add the environmental variables to the process.
+    val currentEnvVars = pb.environment()
+
     envVars.foreach {
       case (variable, value) => currentEnvVars.put(variable, value)
     }
@@ -41,33 +45,70 @@ trait PythonRDDBase {
         for (elem <- command) {
           out.println(elem)
         }
+        out.flush()
+        val dOut = new DataOutputStream(proc.getOutputStream)
         for (elem <- parent.iterator(split)) {
-          out.println(PythonRDD.pythonDump(elem))
+          if (elem.isInstanceOf[Array[Byte]]) {
+            val arr = elem.asInstanceOf[Array[Byte]]
+            dOut.writeInt(arr.length)
+            dOut.write(arr)
+          } else if (elem.isInstanceOf[scala.Tuple2[_, _]]) {
+            val t = elem.asInstanceOf[scala.Tuple2[_, _]]
+            val t1 = t._1.asInstanceOf[Array[Byte]]
+            val t2 = t._2.asInstanceOf[Array[Byte]]
+            val length = t1.length + t2.length - 3 - 3 + 4  // stripPickle() removes 3 bytes
+            dOut.writeInt(length)
+            dOut.writeByte(Pickle.PROTO)
+            dOut.writeByte(Pickle.TWO)
+            dOut.write(PythonRDD.stripPickle(t1))
+            dOut.write(PythonRDD.stripPickle(t2))
+            dOut.writeByte(Pickle.TUPLE2)
+            dOut.writeByte(Pickle.STOP)
+          } else if (elem.isInstanceOf[String]) {
+            // For uniformity, strings are wrapped into Pickles.
+            val s = elem.asInstanceOf[String].getBytes("UTF-8")
+            val length = 2 + 1 + 4 + s.length + 1
+            dOut.writeInt(length)
+            dOut.writeByte(Pickle.PROTO)
+            dOut.writeByte(Pickle.TWO)
+            dOut.writeByte(Pickle.BINUNICODE)
+            dOut.writeInt(Integer.reverseBytes(s.length))
+            dOut.write(s)
+            dOut.writeByte(Pickle.STOP)
+          } else {
+            throw new Exception("Unexpected RDD type")
+          }
         }
-        out.close()
+        dOut.flush()
+        out.flush()
+        proc.getOutputStream.close()
       }
     }.start()
 
     // Return an iterator that read lines from the process's stdout
-    val lines: Iterator[String] = Source.fromInputStream(proc.getInputStream).getLines
-    wrapIterator(lines, proc)
-  }
+    val stream = new DataInputStream(proc.getInputStream)
+    return new Iterator[Array[Byte]] {
+      def next() = {
+        val obj = _nextObj
+        _nextObj = read()
+        obj
+      }
 
-  def wrapIterator[T](iter: Iterator[T], proc: Process): Iterator[T] = {
-    return new Iterator[T] {
-      def next() = iter.next()
-
-      def hasNext = {
-        if (iter.hasNext) {
-          true
-        } else {
-          val exitStatus = proc.waitFor()
-          if (exitStatus != 0) {
-            throw new Exception("Subprocess exited with status " + exitStatus)
-          }
-          false
+      private def read() = {
+        try {
+          val length = stream.readInt()
+          val obj = new Array[Byte](length)
+          stream.readFully(obj)
+          obj
+        } catch {
+          case eof: EOFException => { new Array[Byte](0) }
+          case e => throw e
         }
       }
+
+      var _nextObj = read()
+
+      def hasNext = _nextObj.length != 0
     }
   }
 }
@@ -75,7 +116,7 @@ trait PythonRDDBase {
 class PythonRDD[T: ClassManifest](
   parent: RDD[T], command: Seq[String], envVars: Map[String, String],
   preservePartitoning: Boolean, pythonExec: String)
-  extends RDD[String](parent.context) with PythonRDDBase {
+  extends RDD[Array[Byte]](parent.context) with PythonRDDBase {
 
   def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean, pythonExec: String) =
     this(parent, command, Map(), preservePartitoning, pythonExec)
@@ -91,16 +132,16 @@ class PythonRDD[T: ClassManifest](
 
   override val partitioner = if (preservePartitoning) parent.partitioner else None
 
-  override def compute(split: Split): Iterator[String] =
+  override def compute(split: Split): Iterator[Array[Byte]] =
     compute(split, envVars, command, parent, pythonExec)
 
-  val asJavaRDD : JavaRDD[String] = JavaRDD.fromRDD(this)
+  val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
 }
 
 class PythonPairRDD[T: ClassManifest] (
   parent: RDD[T], command: Seq[String], envVars: Map[String, String],
   preservePartitoning: Boolean, pythonExec: String)
-  extends RDD[(String, String)](parent.context) with PythonRDDBase {
+  extends RDD[(Array[Byte], Array[Byte])](parent.context) with PythonRDDBase {
 
   def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean, pythonExec: String) =
     this(parent, command, Map(), preservePartitoning, pythonExec)
@@ -116,32 +157,95 @@ class PythonPairRDD[T: ClassManifest] (
 
   override val partitioner = if (preservePartitoning) parent.partitioner else None
 
-  override def compute(split: Split): Iterator[(String, String)] = {
+  override def compute(split: Split): Iterator[(Array[Byte], Array[Byte])] = {
     compute(split, envVars, command, parent, pythonExec).grouped(2).map {
       case Seq(a, b) => (a, b)
-      case x          => throw new Exception("Unexpected value: " + x)
+      case x          => throw new Exception("PythonPairRDD: unexpected value: " + x)
     }
   }
 
-  val asJavaPairRDD : JavaPairRDD[String, String] = JavaPairRDD.fromRDD(this)
+  val asJavaPairRDD : JavaPairRDD[Array[Byte], Array[Byte]] = JavaPairRDD.fromRDD(this)
 }
 
+
 object PythonRDD {
-  def pythonDump[T](x: T): String = {
-    if (x.isInstanceOf[scala.Option[_]]) {
-      val t = x.asInstanceOf[scala.Option[_]]
-      t match {
-        case None => "*"
-        case Some(z) => pythonDump(z)
-      }
-    } else if (x.isInstanceOf[scala.Tuple2[_, _]]) {
-      val t = x.asInstanceOf[scala.Tuple2[_, _]]
-      "(" + pythonDump(t._1) + "," + pythonDump(t._2) + ")"
-    } else if (x.isInstanceOf[java.util.List[_]]) {
-      val objs = asScalaBuffer(x.asInstanceOf[java.util.List[_]]).map(pythonDump)
-      "[" + objs.mkString("|") + "]"
+
+  /** Strips the pickle PROTO and STOP opcodes from the start and end of a pickle */
+  def stripPickle(arr: Array[Byte]) : Array[Byte] = {
+    arr.slice(2, arr.length - 1)
+  }
+
+  def asPickle(elem: Any) : Array[Byte] = {
+    val baos = new ByteArrayOutputStream();
+    val dOut = new DataOutputStream(baos);
+    if (elem.isInstanceOf[Array[Byte]]) {
+      elem.asInstanceOf[Array[Byte]]
+    } else if (elem.isInstanceOf[scala.Tuple2[_, _]]) {
+      val t = elem.asInstanceOf[scala.Tuple2[_, _]]
+      val t1 = t._1.asInstanceOf[Array[Byte]]
+      val t2 = t._2.asInstanceOf[Array[Byte]]
+      dOut.writeByte(Pickle.PROTO)
+      dOut.writeByte(Pickle.TWO)
+      dOut.write(PythonRDD.stripPickle(t1))
+      dOut.write(PythonRDD.stripPickle(t2))
+      dOut.writeByte(Pickle.TUPLE2)
+      dOut.writeByte(Pickle.STOP)
+      baos.toByteArray()
+    } else if (elem.isInstanceOf[String]) {
+      // For uniformity, strings are wrapped into Pickles.
+      val s = elem.asInstanceOf[String].getBytes("UTF-8")
+      dOut.writeByte(Pickle.PROTO)
+      dOut.writeByte(Pickle.TWO)
+      dOut.write(Pickle.BINUNICODE)
+      dOut.writeInt(Integer.reverseBytes(s.length))
+      dOut.write(s)
+      dOut.writeByte(Pickle.STOP)
+      baos.toByteArray()
     } else {
-      x.toString
+      throw new Exception("Unexpected RDD type")
     }
   }
+
+  def pickleFile(sc: JavaSparkContext, filename: String, parallelism: Int) :
+  JavaRDD[Array[Byte]] = {
+    val file = new DataInputStream(new FileInputStream(filename))
+    val objs = new collection.mutable.ArrayBuffer[Array[Byte]]
+    try {
+      while (true) {
+        val length = file.readInt()
+        val obj = new Array[Byte](length)
+        file.readFully(obj)
+        objs.append(obj)
+      }
+    } catch {
+      case eof: EOFException => {}
+      case e => throw e
+    }
+    JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
+  }
+
+  def arrayAsPickle(arr : Any) : Array[Byte] = {
+    val pickles : Array[Byte] = arr.asInstanceOf[Array[Any]].map(asPickle).map(stripPickle).flatten
+
+    Array[Byte](Pickle.PROTO, Pickle.TWO, Pickle.EMPTY_LIST, Pickle.MARK) ++ pickles ++
+      Array[Byte] (Pickle.APPENDS, Pickle.STOP)
+  }
+}
+
+private object Pickle {
+  def b(x: Int): Byte = x.asInstanceOf[Byte]
+  val PROTO: Byte = b(0x80)
+  val TWO: Byte = b(0x02)
+  val BINUNICODE : Byte = 'X'
+  val STOP : Byte = '.'
+  val TUPLE2 : Byte = b(0x86)
+  val EMPTY_LIST : Byte = ']'
+  val MARK : Byte = '('
+  val APPENDS : Byte = 'e'
+}
+class ExtractValue extends spark.api.java.function.Function[(Array[Byte],
+  Array[Byte]), Array[Byte]] {
+
+  override def call(pair: (Array[Byte], Array[Byte])) : Array[Byte] = pair._2
+
 }
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 587ab12b5f..ac7e4057e9 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -3,22 +3,24 @@ import atexit
 from tempfile import NamedTemporaryFile
 
 from pyspark.java_gateway import launch_gateway
-from pyspark.serializers import JSONSerializer, NopSerializer
-from pyspark.rdd import RDD, PairRDD
+from pyspark.serializers import PickleSerializer, dumps
+from pyspark.rdd import RDD
 
 
 class SparkContext(object):
 
     gateway = launch_gateway()
     jvm = gateway.jvm
-    python_dump = jvm.spark.api.python.PythonRDD.pythonDump
+    pickleFile = jvm.spark.api.python.PythonRDD.pickleFile
+    asPickle = jvm.spark.api.python.PythonRDD.asPickle
+    arrayAsPickle = jvm.spark.api.python.PythonRDD.arrayAsPickle
 
-    def __init__(self, master, name, defaultSerializer=JSONSerializer,
-            defaultParallelism=None, pythonExec='python'):
+
+    def __init__(self, master, name, defaultParallelism=None,
+        pythonExec='python'):
         self.master = master
         self.name = name
         self._jsc = self.jvm.JavaSparkContext(master, name)
-        self.defaultSerializer = defaultSerializer
         self.defaultParallelism = \
             defaultParallelism or self._jsc.sc().defaultParallelism()
         self.pythonExec = pythonExec
@@ -31,39 +33,26 @@ class SparkContext(object):
         self._jsc.stop()
         self._jsc = None
 
-    def parallelize(self, c, numSlices=None, serializer=None):
-        serializer = serializer or self.defaultSerializer
+    def parallelize(self, c, numSlices=None):
+        """
+        >>> sc = SparkContext("local", "test")
+        >>> rdd = sc.parallelize([(1, 2), (3, 4)])
+        >>> rdd.collect()
+        [(1, 2), (3, 4)]
+        """
         numSlices = numSlices or self.defaultParallelism
         # Calling the Java parallelize() method with an ArrayList is too slow,
         # because it sends O(n) Py4J commands.  As an alternative, serialized
         # objects are written to a file and loaded through textFile().
         tempFile = NamedTemporaryFile(delete=False)
-        tempFile.writelines(serializer.dumps(x) + '\n' for x in c)
+        for x in c:
+            dumps(PickleSerializer.dumps(x), tempFile)
         tempFile.close()
         atexit.register(lambda: os.unlink(tempFile.name))
-        return self.textFile(tempFile.name, numSlices, serializer)
+        jrdd = self.pickleFile(self._jsc, tempFile.name, numSlices)
+        return RDD(jrdd, self)
 
-    def parallelizePairs(self, c, numSlices=None, keySerializer=None,
-                         valSerializer=None):
-        """
-        >>> sc = SparkContext("local", "test")
-        >>> rdd = sc.parallelizePairs([(1, 2), (3, 4)])
-        >>> rdd.collect()
-        [(1, 2), (3, 4)]
-        """
-        keySerializer = keySerializer or self.defaultSerializer
-        valSerializer = valSerializer or self.defaultSerializer
-        numSlices = numSlices or self.defaultParallelism
-        tempFile = NamedTemporaryFile(delete=False)
-        for (k, v) in c:
-            tempFile.write(keySerializer.dumps(k).rstrip('\r\n') + '\n')
-            tempFile.write(valSerializer.dumps(v).rstrip('\r\n') + '\n')
-        tempFile.close()
-        atexit.register(lambda: os.unlink(tempFile.name))
-        jrdd = self.textFile(tempFile.name, numSlices)._pipePairs([], "echo")
-        return PairRDD(jrdd, self, keySerializer, valSerializer)
-
-    def textFile(self, name, numSlices=None, serializer=NopSerializer):
+    def textFile(self, name, numSlices=None):
         numSlices = numSlices or self.defaultParallelism
         jrdd = self._jsc.textFile(name, numSlices)
-        return RDD(jrdd, self, serializer)
+        return RDD(jrdd, self)
diff --git a/pyspark/pyspark/java_gateway.py b/pyspark/pyspark/java_gateway.py
index 2df80aee85..bcb405ba72 100644
--- a/pyspark/pyspark/java_gateway.py
+++ b/pyspark/pyspark/java_gateway.py
@@ -16,5 +16,4 @@ def launch_gateway():
     java_import(gateway.jvm, "spark.api.java.*")
     java_import(gateway.jvm, "spark.api.python.*")
     java_import(gateway.jvm, "scala.Tuple2")
-    java_import(gateway.jvm, "spark.api.python.PythonRDD.pythonDump")
     return gateway
diff --git a/pyspark/pyspark/join.py b/pyspark/pyspark/join.py
index c67520fce8..7036c47980 100644
--- a/pyspark/pyspark/join.py
+++ b/pyspark/pyspark/join.py
@@ -30,15 +30,12 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 """
-from pyspark.serializers import PairSerializer, OptionSerializer, \
-    ArraySerializer
 
 
-def _do_python_join(rdd, other, numSplits, dispatch, valSerializer):
-    vs = rdd.mapPairs(lambda (k, v): (k, (1, v)))
-    ws = other.mapPairs(lambda (k, v): (k, (2, v)))
-    return vs.union(ws).groupByKey(numSplits) \
-             .flatMapValues(dispatch, valSerializer)
+def _do_python_join(rdd, other, numSplits, dispatch):
+    vs = rdd.map(lambda (k, v): (k, (1, v)))
+    ws = other.map(lambda (k, v): (k, (2, v)))
+    return vs.union(ws).groupByKey(numSplits).flatMapValues(dispatch)
 
 
 def python_join(rdd, other, numSplits):
@@ -50,8 +47,7 @@ def python_join(rdd, other, numSplits):
             elif n == 2:
                 wbuf.append(v)
         return [(v, w) for v in vbuf for w in wbuf]
-    valSerializer = PairSerializer(rdd.valSerializer, other.valSerializer)
-    return _do_python_join(rdd, other, numSplits, dispatch, valSerializer)
+    return _do_python_join(rdd, other, numSplits, dispatch)
 
 
 def python_right_outer_join(rdd, other, numSplits):
@@ -65,9 +61,7 @@ def python_right_outer_join(rdd, other, numSplits):
         if not vbuf:
             vbuf.append(None)
         return [(v, w) for v in vbuf for w in wbuf]
-    valSerializer = PairSerializer(OptionSerializer(rdd.valSerializer),
-                                   other.valSerializer)
-    return _do_python_join(rdd, other, numSplits, dispatch, valSerializer)
+    return _do_python_join(rdd, other, numSplits, dispatch)
 
 
 def python_left_outer_join(rdd, other, numSplits):
@@ -81,17 +75,12 @@ def python_left_outer_join(rdd, other, numSplits):
         if not wbuf:
             wbuf.append(None)
         return [(v, w) for v in vbuf for w in wbuf]
-    valSerializer = PairSerializer(rdd.valSerializer,
-                                   OptionSerializer(other.valSerializer))
-    return _do_python_join(rdd, other, numSplits, dispatch, valSerializer)
+    return _do_python_join(rdd, other, numSplits, dispatch)
 
 
 def python_cogroup(rdd, other, numSplits):
-    resultValSerializer = PairSerializer(
-        ArraySerializer(rdd.valSerializer),
-        ArraySerializer(other.valSerializer))
-    vs = rdd.mapPairs(lambda (k, v): (k, (1, v)))
-    ws = other.mapPairs(lambda (k, v): (k, (2, v)))
+    vs = rdd.map(lambda (k, v): (k, (1, v)))
+    ws = other.map(lambda (k, v): (k, (2, v)))
     def dispatch(seq):
         vbuf, wbuf = [], []
         for (n, v) in seq:
@@ -100,5 +89,4 @@ def python_cogroup(rdd, other, numSplits):
             elif n == 2:
                 wbuf.append(v)
         return (vbuf, wbuf)
-    return vs.union(ws).groupByKey(numSplits) \
-             .mapValues(dispatch, resultValSerializer)
+    return vs.union(ws).groupByKey(numSplits).mapValues(dispatch)
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 5579c56de3..8eccddc0a2 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,31 +1,17 @@
 from base64 import standard_b64encode as b64enc
-from pyspark import cloudpickle
-from itertools import chain
 
-from pyspark.serializers import PairSerializer, NopSerializer, \
-    OptionSerializer, ArraySerializer
+from pyspark import cloudpickle
+from pyspark.serializers import PickleSerializer
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_cogroup
 
 
 class RDD(object):
 
-    def __init__(self, jrdd, ctx, serializer=None):
+    def __init__(self, jrdd, ctx):
         self._jrdd = jrdd
         self.is_cached = False
         self.ctx = ctx
-        self.serializer = serializer or ctx.defaultSerializer
-
-    def _builder(self, jrdd, ctx):
-        return RDD(jrdd, ctx, self.serializer)
-
-    @property
-    def id(self):
-        return self._jrdd.id()
-
-    @property
-    def splits(self):
-        return self._jrdd.splits()
 
     @classmethod
     def _get_pipe_command(cls, command, functions):
@@ -41,55 +27,18 @@ class RDD(object):
         self._jrdd.cache()
         return self
 
-    def map(self, f, serializer=None, preservesPartitioning=False):
-        return MappedRDD(self, f, serializer, preservesPartitioning)
+    def map(self, f, preservesPartitioning=False):
+        return MappedRDD(self, f, preservesPartitioning)
 
-    def mapPairs(self, f, keySerializer=None, valSerializer=None,
-                 preservesPartitioning=False):
-        return PairMappedRDD(self, f, keySerializer, valSerializer,
-                             preservesPartitioning)
-
-    def flatMap(self, f, serializer=None):
+    def flatMap(self, f):
         """
         >>> rdd = sc.parallelize([2, 3, 4])
         >>> sorted(rdd.flatMap(lambda x: range(1, x)).collect())
         [1, 1, 1, 2, 2, 3]
-        """
-        serializer = serializer or self.ctx.defaultSerializer
-        dumps = serializer.dumps
-        loads = self.serializer.loads
-        def func(x):
-            pickled_elems = (dumps(y) for y in f(loads(x)))
-            return "\n".join(pickled_elems) or None
-        pipe_command = RDD._get_pipe_command("map", [func])
-        class_manifest = self._jrdd.classManifest()
-        jrdd = self.ctx.jvm.PythonRDD(self._jrdd.rdd(), pipe_command,
-                                      False, self.ctx.pythonExec,
-                                      class_manifest).asJavaRDD()
-        return RDD(jrdd, self.ctx, serializer)
-
-    def flatMapPairs(self, f, keySerializer=None, valSerializer=None,
-                     preservesPartitioning=False):
-        """
-        >>> rdd = sc.parallelize([2, 3, 4])
-        >>> sorted(rdd.flatMapPairs(lambda x: [(x, x), (x, x)]).collect())
+        >>> sorted(rdd.flatMap(lambda x: [(x, x), (x, x)]).collect())
         [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
         """
-        keySerializer = keySerializer or self.ctx.defaultSerializer
-        valSerializer = valSerializer or self.ctx.defaultSerializer
-        dumpk = keySerializer.dumps
-        dumpv = valSerializer.dumps
-        loads = self.serializer.loads
-        def func(x):
-            pairs = f(loads(x))
-            pickled_pairs = ((dumpk(k), dumpv(v)) for (k, v) in pairs)
-            return "\n".join(chain.from_iterable(pickled_pairs)) or None
-        pipe_command = RDD._get_pipe_command("map", [func])
-        class_manifest = self._jrdd.classManifest()
-        python_rdd = self.ctx.jvm.PythonPairRDD(self._jrdd.rdd(), pipe_command,
-            preservesPartitioning, self.ctx.pythonExec, class_manifest)
-        return PairRDD(python_rdd.asJavaPairRDD(), self.ctx, keySerializer,
-           valSerializer)
+        return MappedRDD(self, f, preservesPartitioning=False, command='flatmap')
 
     def filter(self, f):
         """
@@ -97,9 +46,8 @@ class RDD(object):
         >>> rdd.filter(lambda x: x % 2 == 0).collect()
         [2, 4]
         """
-        loads = self.serializer.loads
-        def filter_func(x): return x if f(loads(x)) else None
-        return self._builder(self._pipe(filter_func), self.ctx)
+        def filter_func(x): return x if f(x) else None
+        return RDD(self._pipe(filter_func), self.ctx)
 
     def _pipe(self, functions, command="map"):
         class_manifest = self._jrdd.classManifest()
@@ -108,32 +56,22 @@ class RDD(object):
             False, self.ctx.pythonExec, class_manifest)
         return python_rdd.asJavaRDD()
 
-    def _pipePairs(self, functions, command="mapPairs",
-            preservesPartitioning=False):
-        class_manifest = self._jrdd.classManifest()
-        pipe_command = RDD._get_pipe_command(command, functions)
-        python_rdd = self.ctx.jvm.PythonPairRDD(self._jrdd.rdd(), pipe_command,
-                preservesPartitioning, self.ctx.pythonExec, class_manifest)
-        return python_rdd.asJavaPairRDD()
-
     def distinct(self):
         """
         >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
         [1, 2, 3]
         """
-        if self.serializer.is_comparable:
-            return self._builder(self._jrdd.distinct(), self.ctx)
-        return self.mapPairs(lambda x: (x, "")) \
+        return self.map(lambda x: (x, "")) \
                    .reduceByKey(lambda x, _: x) \
                    .map(lambda (x, _): x)
 
     def sample(self, withReplacement, fraction, seed):
         jrdd = self._jrdd.sample(withReplacement, fraction, seed)
-        return self._builder(jrdd, self.ctx)
+        return RDD(jrdd, self.ctx)
 
     def takeSample(self, withReplacement, num, seed):
         vals = self._jrdd.takeSample(withReplacement, num, seed)
-        return [self.serializer.loads(self.ctx.python_dump(x)) for x in vals]
+        return [PickleSerializer.loads(x) for x in vals]
 
     def union(self, other):
         """
@@ -141,7 +79,7 @@ class RDD(object):
         >>> rdd.union(rdd).collect()
         [1, 1, 2, 3, 1, 1, 2, 3]
         """
-        return self._builder(self._jrdd.union(other._jrdd), self.ctx)
+        return RDD(self._jrdd.union(other._jrdd), self.ctx)
 
     # TODO: sort
 
@@ -155,16 +93,17 @@ class RDD(object):
         >>> sorted(rdd.cartesian(rdd).collect())
         [(1, 1), (1, 2), (2, 1), (2, 2)]
         """
-        return PairRDD(self._jrdd.cartesian(other._jrdd), self.ctx)
+        return RDD(self._jrdd.cartesian(other._jrdd), self.ctx)
 
     # numsplits
     def groupBy(self, f, numSplits=None):
         """
         >>> rdd = sc.parallelize([1, 1, 2, 3, 5, 8])
-        >>> sorted(rdd.groupBy(lambda x: x % 2).collect())
+        >>> result = rdd.groupBy(lambda x: x % 2).collect()
+        >>> sorted([(x, sorted(y)) for (x, y) in result])
         [(0, [2, 8]), (1, [1, 1, 3, 5])]
         """
-        return self.mapPairs(lambda x: (f(x), x)).groupByKey(numSplits)
+        return self.map(lambda x: (f(x), x)).groupByKey(numSplits)
 
     # TODO: pipe
 
@@ -178,25 +117,19 @@ class RDD(object):
         self.map(f).collect()  # Force evaluation
 
     def collect(self):
-        vals = self._jrdd.collect()
-        return [self.serializer.loads(self.ctx.python_dump(x)) for x in vals]
+        pickle = self.ctx.arrayAsPickle(self._jrdd.rdd().collect())
+        return PickleSerializer.loads(bytes(pickle))
 
-    def reduce(self, f, serializer=None):
+    def reduce(self, f):
         """
-        >>> import operator
-        >>> sc.parallelize([1, 2, 3, 4, 5]).reduce(operator.add)
+        >>> from operator import add
+        >>> sc.parallelize([1, 2, 3, 4, 5]).reduce(add)
         15
+        >>> sc.parallelize((2 for _ in range(10))).map(lambda x: 1).cache().reduce(add)
+        10
         """
-        serializer = serializer or self.ctx.defaultSerializer
-        loads = self.serializer.loads
-        dumps = serializer.dumps
-        def reduceFunction(x, acc):
-            if acc is None:
-                return loads(x)
-            else:
-                return f(loads(x), acc)
-        vals = self._pipe([reduceFunction, dumps], command="reduce").collect()
-        return reduce(f, (serializer.loads(x) for x in vals))
+        vals = MappedRDD(self, f, command="reduce", preservesPartitioning=False).collect()
+        return reduce(f, vals)
 
     # TODO: fold
 
@@ -216,36 +149,35 @@ class RDD(object):
         >>> sc.parallelize([2, 3, 4]).take(2)
         [2, 3]
         """
-        vals = self._jrdd.take(num)
-        return [self.serializer.loads(self.ctx.python_dump(x)) for x in vals]
+        pickle = self.ctx.arrayAsPickle(self._jrdd.rdd().take(num))
+        return PickleSerializer.loads(bytes(pickle))
 
     def first(self):
         """
         >>> sc.parallelize([2, 3, 4]).first()
         2
         """
-        return self.serializer.loads(self.ctx.python_dump(self._jrdd.first()))
+        return PickleSerializer.loads(bytes(self.ctx.asPickle(self._jrdd.first())))
 
     # TODO: saveAsTextFile
 
     # TODO: saveAsObjectFile
 
+    # Pair functions
 
-class PairRDD(RDD):
-
-    def __init__(self, jrdd, ctx, keySerializer=None, valSerializer=None):
-        RDD.__init__(self, jrdd, ctx)
-        self.keySerializer = keySerializer or ctx.defaultSerializer
-        self.valSerializer = valSerializer or ctx.defaultSerializer
-        self.serializer = \
-            PairSerializer(self.keySerializer, self.valSerializer)
-
-    def _builder(self, jrdd, ctx):
-        return PairRDD(jrdd, ctx, self.keySerializer, self.valSerializer)
+    def collectAsMap(self):
+        """
+        >>> m = sc.parallelize([(1, 2), (3, 4)]).collectAsMap()
+        >>> m[1]
+        2
+        >>> m[3]
+        4
+        """
+        return dict(self.collect())
 
     def reduceByKey(self, func, numSplits=None):
         """
-        >>> x = sc.parallelizePairs([("a", 1), ("b", 1), ("a", 1)])
+        >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(x.reduceByKey(lambda a, b: a + b).collect())
         [('a', 2), ('b', 1)]
         """
@@ -259,90 +191,67 @@ class PairRDD(RDD):
 
     def join(self, other, numSplits=None):
         """
-        >>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
-        >>> y = sc.parallelizePairs([("a", 2), ("a", 3)])
-        >>> x.join(y).collect()
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2), ("a", 3)])
+        >>> sorted(x.join(y).collect())
         [('a', (1, 2)), ('a', (1, 3))]
-
-        Check that we get a PairRDD-like object back:
-        >>> assert x.join(y).join
         """
-        assert self.keySerializer.name == other.keySerializer.name
-        if self.keySerializer.is_comparable:
-            return PairRDD(self._jrdd.join(other._jrdd),
-                self.ctx, self.keySerializer,
-                PairSerializer(self.valSerializer, other.valSerializer))
-        else:
-            return python_join(self, other, numSplits)
+        return python_join(self, other, numSplits)
 
     def leftOuterJoin(self, other, numSplits=None):
         """
-        >>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
-        >>> y = sc.parallelizePairs([("a", 2)])
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2)])
         >>> sorted(x.leftOuterJoin(y).collect())
         [('a', (1, 2)), ('b', (4, None))]
         """
-        assert self.keySerializer.name == other.keySerializer.name
-        if self.keySerializer.is_comparable:
-            return PairRDD(self._jrdd.leftOuterJoin(other._jrdd),
-                self.ctx, self.keySerializer,
-                PairSerializer(self.valSerializer,
-                               OptionSerializer(other.valSerializer)))
-        else:
-            return python_left_outer_join(self, other, numSplits)
+        return python_left_outer_join(self, other, numSplits)
 
     def rightOuterJoin(self, other, numSplits=None):
         """
-        >>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
-        >>> y = sc.parallelizePairs([("a", 2)])
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2)])
         >>> sorted(y.rightOuterJoin(x).collect())
         [('a', (2, 1)), ('b', (None, 4))]
         """
-        assert self.keySerializer.name == other.keySerializer.name
-        if self.keySerializer.is_comparable:
-            return PairRDD(self._jrdd.rightOuterJoin(other._jrdd),
-                self.ctx, self.keySerializer,
-                PairSerializer(OptionSerializer(self.valSerializer),
-                               other.valSerializer))
-        else:
-            return python_right_outer_join(self, other, numSplits)
+        return python_right_outer_join(self, other, numSplits)
+
+    # TODO: pipelining
+    # TODO: optimizations
+    def shuffle(self, numSplits):
+        if numSplits is None:
+            numSplits = self.ctx.defaultParallelism
+        pipe_command = RDD._get_pipe_command('shuffle_map_step', [])
+        class_manifest = self._jrdd.classManifest()
+        python_rdd = self.ctx.jvm.PythonPairRDD(self._jrdd.rdd(),
+            pipe_command, False, self.ctx.pythonExec, class_manifest)
+        partitioner = self.ctx.jvm.spark.HashPartitioner(numSplits)
+        jrdd = python_rdd.asJavaPairRDD().partitionBy(partitioner)
+        jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
+        # TODO: extract second value.
+        return RDD(jrdd, self.ctx)
+
+
 
     def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
-                     numSplits=None, serializer=None):
+                     numSplits=None):
         """
-        >>> x = sc.parallelizePairs([("a", 1), ("b", 1), ("a", 1)])
+        >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> def f(x): return x
         >>> def add(a, b): return a + str(b)
         >>> sorted(x.combineByKey(str, add, add).collect())
         [('a', '11'), ('b', '1')]
         """
-        serializer = serializer or self.ctx.defaultSerializer
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
-        # Use hash() to create keys that are comparable in Java.
-        loadkv = self.serializer.loads
-        def pairify(kv):
-            # TODO: add method to deserialize only the key or value from
-            # a PairSerializer?
-            key = loadkv(kv)[0]
-            return (str(hash(key)), kv)
-        partitioner = self.ctx.jvm.spark.HashPartitioner(numSplits)
-        jrdd = self._pipePairs(pairify).partitionBy(partitioner)
-        pairified = PairRDD(jrdd, self.ctx, NopSerializer, self.serializer)
-
-        loads = PairSerializer(NopSerializer, self.serializer).loads
-        dumpk = self.keySerializer.dumps
-        dumpc = serializer.dumps
-
-        functions = [createCombiner, mergeValue, mergeCombiners, loads, dumpk,
-                     dumpc]
-        jpairs = pairified._pipePairs(functions, "combine_by_key",
-                                      preservesPartitioning=True)
-        return PairRDD(jpairs, self.ctx, self.keySerializer, serializer)
+        shuffled = self.shuffle(numSplits)
+        functions = [createCombiner, mergeValue, mergeCombiners]
+        jpairs = shuffled._pipe(functions, "combine_by_key")
+        return RDD(jpairs, self.ctx)
 
     def groupByKey(self, numSplits=None):
         """
-        >>> x = sc.parallelizePairs([("a", 1), ("b", 1), ("a", 1)])
+        >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(x.groupByKey().collect())
         [('a', [1, 1]), ('b', [1])]
         """
@@ -360,29 +269,15 @@ class PairRDD(RDD):
         return self.combineByKey(createCombiner, mergeValue, mergeCombiners,
                 numSplits)
 
-    def collectAsMap(self):
-        """
-        >>> m = sc.parallelizePairs([(1, 2), (3, 4)]).collectAsMap()
-        >>> m[1]
-        2
-        >>> m[3]
-        4
-        """
-        m = self._jrdd.collectAsMap()
-        def loads(x):
-            (k, v) = x
-            return (self.keySerializer.loads(k), self.valSerializer.loads(v))
-        return dict(loads(x) for x in m.items())
-
-    def flatMapValues(self, f, valSerializer=None):
+    def flatMapValues(self, f):
         flat_map_fn = lambda (k, v): ((k, x) for x in f(v))
-        return self.flatMapPairs(flat_map_fn, self.keySerializer,
-                                 valSerializer, True)
+        return self.flatMap(flat_map_fn)
 
-    def mapValues(self, f, valSerializer=None):
+    def mapValues(self, f):
         map_values_fn = lambda (k, v): (k, f(v))
-        return self.mapPairs(map_values_fn, self.keySerializer, valSerializer,
-                             True)
+        return self.map(map_values_fn, preservesPartitioning=True)
+
+    # TODO: implement shuffle.
 
     # TODO: support varargs cogroup of several RDDs.
     def groupWith(self, other):
@@ -390,20 +285,12 @@ class PairRDD(RDD):
 
     def cogroup(self, other, numSplits=None):
         """
-        >>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
-        >>> y = sc.parallelizePairs([("a", 2)])
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2)])
         >>> x.cogroup(y).collect()
         [('a', ([1], [2])), ('b', ([4], []))]
         """
-        assert self.keySerializer.name == other.keySerializer.name
-        resultValSerializer = PairSerializer(
-            ArraySerializer(self.valSerializer),
-            ArraySerializer(other.valSerializer))
-        if self.keySerializer.is_comparable:
-            return PairRDD(self._jrdd.cogroup(other._jrdd),
-                self.ctx, self.keySerializer, resultValSerializer)
-        else:
-            return python_cogroup(self, other, numSplits)
+        return python_cogroup(self, other, numSplits)
 
     # TODO: `lookup` is disabled because we can't make direct comparisons based
     # on the key; we need to compare the hash of the key to the hash of the
@@ -413,44 +300,84 @@ class PairRDD(RDD):
     # TODO: file saving
 
 
-class MappedRDDBase(object):
-    def __init__(self, prev, func, serializer, preservesPartitioning=False):
-        if isinstance(prev, MappedRDDBase) and not prev.is_cached:
-            prev_func = prev.func
-            self.func = lambda x: func(prev_func(x))
-            self.preservesPartitioning = \
-                prev.preservesPartitioning and preservesPartitioning
-            self._prev_jrdd = prev._prev_jrdd
-            self._prev_serializer = prev._prev_serializer
-        else:
-            self.func = func
-            self.preservesPartitioning = preservesPartitioning
-            self._prev_jrdd = prev._jrdd
-            self._prev_serializer = prev.serializer
-        self.serializer = serializer or prev.ctx.defaultSerializer
-        self.is_cached = False
-        self.ctx = prev.ctx
-        self.prev = prev
-        self._jrdd_val = None
-
-
-class MappedRDD(MappedRDDBase, RDD):
+class MappedRDD(RDD):
     """
+    Pipelined maps:
     >>> rdd = sc.parallelize([1, 2, 3, 4])
     >>> rdd.map(lambda x: 2 * x).cache().map(lambda x: 2 * x).collect()
     [4, 8, 12, 16]
     >>> rdd.map(lambda x: 2 * x).map(lambda x: 2 * x).collect()
     [4, 8, 12, 16]
+
+    Pipelined reduces:
+    >>> from operator import add
+    >>> rdd.map(lambda x: 2 * x).reduce(add)
+    20
+    >>> rdd.flatMap(lambda x: [x, x]).reduce(add)
+    20
     """
+    def __init__(self, prev, func, preservesPartitioning=False, command='map'):
+        if isinstance(prev, MappedRDD) and not prev.is_cached:
+            prev_func = prev.func
+            if command == 'reduce':
+                if prev.command == 'flatmap':
+                    def flatmap_reduce_func(x, acc):
+                        values = prev_func(x)
+                        if values is None:
+                            return acc
+                        if not acc:
+                            if len(values) == 1:
+                                return values[0]
+                            else:
+                                return reduce(func, values[1:], values[0])
+                        else:
+                            return reduce(func, values, acc)
+                    self.func = flatmap_reduce_func
+                else:
+                    def reduce_func(x, acc):
+                        val = prev_func(x)
+                        if not val:
+                            return acc
+                        if acc is None:
+                            return val
+                        else:
+                            return func(val, acc)
+                    self.func = reduce_func
+            else:
+                if prev.command == 'flatmap':
+                    command = 'flatmap'
+                    self.func = lambda x: (func(y) for y in prev_func(x))
+                else:
+                    self.func = lambda x: func(prev_func(x))
+
+            self.preservesPartitioning = \
+                prev.preservesPartitioning and preservesPartitioning
+            self._prev_jrdd = prev._prev_jrdd
+            self.is_pipelined = True
+        else:
+            if command == 'reduce':
+                def reduce_func(val, acc):
+                    if acc is None:
+                        return val
+                    else:
+                        return func(val, acc)
+                self.func = reduce_func
+            else:
+                self.func = func
+            self.preservesPartitioning = preservesPartitioning
+            self._prev_jrdd = prev._jrdd
+            self.is_pipelined = False
+        self.is_cached = False
+        self.ctx = prev.ctx
+        self.prev = prev
+        self._jrdd_val = None
+        self.command = command
 
     @property
     def _jrdd(self):
         if not self._jrdd_val:
-            udf = self.func
-            loads = self._prev_serializer.loads
-            dumps = self.serializer.dumps
-            func = lambda x: dumps(udf(loads(x)))
-            pipe_command = RDD._get_pipe_command("map", [func])
+            funcs = [self.func]
+            pipe_command = RDD._get_pipe_command(self.command, funcs)
             class_manifest = self._prev_jrdd.classManifest()
             python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
                 pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
@@ -459,56 +386,11 @@ class MappedRDD(MappedRDDBase, RDD):
         return self._jrdd_val
 
 
-class PairMappedRDD(MappedRDDBase, PairRDD):
-    """
-    >>> rdd = sc.parallelize([1, 2, 3, 4])
-    >>> rdd.mapPairs(lambda x: (x, x)) \\
-    ...    .mapPairs(lambda (x, y): (2*x, 2*y)) \\
-    ...    .collect()
-    [(2, 2), (4, 4), (6, 6), (8, 8)]
-    >>> rdd.mapPairs(lambda x: (x, x)) \\
-    ...    .mapPairs(lambda (x, y): (2*x, 2*y)) \\
-    ...    .map(lambda (x, _): x).collect()
-    [2, 4, 6, 8]
-    """
-
-    def __init__(self, prev, func, keySerializer=None, valSerializer=None,
-                 preservesPartitioning=False):
-        self.keySerializer = keySerializer or prev.ctx.defaultSerializer
-        self.valSerializer = valSerializer or prev.ctx.defaultSerializer
-        serializer = PairSerializer(self.keySerializer, self.valSerializer)
-        MappedRDDBase.__init__(self, prev, func, serializer,
-                               preservesPartitioning)
-
-    @property
-    def _jrdd(self):
-        if not self._jrdd_val:
-            udf = self.func
-            loads = self._prev_serializer.loads
-            dumpk = self.keySerializer.dumps
-            dumpv = self.valSerializer.dumps
-            def func(x):
-                (k, v) = udf(loads(x))
-                return (dumpk(k), dumpv(v))
-            pipe_command = RDD._get_pipe_command("mapPairs", [func])
-            class_manifest = self._prev_jrdd.classManifest()
-            self._jrdd_val = self.ctx.jvm.PythonPairRDD(self._prev_jrdd.rdd(),
-                pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
-                class_manifest).asJavaPairRDD()
-        return self._jrdd_val
-
-
 def _test():
     import doctest
     from pyspark.context import SparkContext
-    from pyspark.serializers import PickleSerializer, JSONSerializer
     globs = globals().copy()
-    globs['sc'] = SparkContext('local', 'PythonTest',
-                               defaultSerializer=JSONSerializer)
-    doctest.testmod(globs=globs)
-    globs['sc'].stop()
-    globs['sc'] = SparkContext('local', 'PythonTest',
-                               defaultSerializer=PickleSerializer)
+    globs['sc'] = SparkContext('local', 'PythonTest')
     doctest.testmod(globs=globs)
     globs['sc'].stop()
 
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
index b113f5656b..7b3e6966e1 100644
--- a/pyspark/pyspark/serializers.py
+++ b/pyspark/pyspark/serializers.py
@@ -2,228 +2,35 @@
 Data serialization methods.
 
 The Spark Python API is built on top of the Spark Java API.  RDDs created in
-Python are stored in Java as RDDs of Strings.  Python objects are automatically
-serialized/deserialized, so this representation is transparent to the end-user.
-
-------------------
-Serializer objects
-------------------
-
-`Serializer` objects are used to customize how an RDD's values are serialized.
-
-Each `Serializer` is a named tuple with four fields:
-
-    - A `dumps` function, for serializing a Python object to a string.
-
-    - A `loads` function, for deserializing a Python object from a string.
-
-    - An `is_comparable` field, True if equal Python objects are serialized to
-      equal strings, and False otherwise.
-
-    - A `name` field, used to identify the Serializer.  Serializers are
-      compared for equality by comparing their names.
-
-The serializer's output should be base64-encoded.
-
-------------------------------------------------------------------
-`is_comparable`: comparing serialized representations for equality
-------------------------------------------------------------------
-
-If `is_comparable` is False, the serializer's representations of equal objects
-are not required to be equal:
-
->>> import pickle
->>> a = {1: 0, 9: 0}
->>> b = {9: 0, 1: 0}
->>> a == b
-True
->>> pickle.dumps(a) == pickle.dumps(b)
-False
-
-RDDs with comparable serializers can use native Java implementations of
-operations like join() and distinct(), which may lead to better performance by
-eliminating deserialization and Python comparisons.
-
-The default JSONSerializer produces comparable representations of common Python
-data structures.
-
---------------------------------------
-Examples of serialized representations
---------------------------------------
-
-The RDD transformations that use Python UDFs are implemented in terms of
-a modified `PipedRDD.pipe()` function.  For each record `x` in the RDD, the
-`pipe()` function pipes `x.toString()` to a Python worker process, which
-deserializes the string into a Python object, executes user-defined functions,
-and outputs serialized Python objects.
-
-The regular `toString()` method returns an ambiguous representation, due to the
-way that Scala `Option` instances are printed:
-
->>> from context import SparkContext
->>> sc = SparkContext("local", "SerializerDocs")
->>> x = sc.parallelizePairs([("a", 1), ("b", 4)])
->>> y = sc.parallelizePairs([("a", 2)])
-
->>> print y.rightOuterJoin(x)._jrdd.first().toString()
-(ImEi,(Some(Mg==),MQ==))
-
-In Java, preprocessing is performed to handle Option instances, so the Python
-process receives unambiguous input:
-
->>> print sc.python_dump(y.rightOuterJoin(x)._jrdd.first())
-(ImEi,(Mg==,MQ==))
-
-The base64-encoding eliminates the need to escape newlines, parentheses and
-other special characters.
-
-----------------------
-Serializer composition
-----------------------
-
-In order to handle nested structures, which could contain object serialized
-with different serializers, the RDD module composes serializers.  For example,
-the serializers in the previous example are:
-
->>> print x.serializer.name
-PairSerializer<JSONSerializer, JSONSerializer>
-
->>> print y.serializer.name
-PairSerializer<JSONSerializer, JSONSerializer>
-
->>> print y.rightOuterJoin(x).serializer.name
-PairSerializer<JSONSerializer, PairSerializer<OptionSerializer<JSONSerializer>, JSONSerializer>>
+Python are stored in Java as RDD[Array[Byte]].  Python objects are
+automatically serialized/deserialized, so this representation is transparent to
+the end-user.
 """
-from base64 import standard_b64encode, standard_b64decode
 from collections import namedtuple
 import cPickle
-import simplejson
+import struct
 
 
-Serializer = namedtuple("Serializer",
-    ["dumps","loads", "is_comparable", "name"])
-
-
-NopSerializer = Serializer(str, str, True, "NopSerializer")
-
-
-JSONSerializer = Serializer(
-    lambda obj: standard_b64encode(simplejson.dumps(obj, sort_keys=True,
-        separators=(',', ':'))),
-    lambda s: simplejson.loads(standard_b64decode(s)),
-    True,
-    "JSONSerializer"
-)
+Serializer = namedtuple("Serializer", ["dumps","loads"])
 
 
 PickleSerializer = Serializer(
-    lambda obj: standard_b64encode(cPickle.dumps(obj)),
-    lambda s: cPickle.loads(standard_b64decode(s)),
-    False,
-    "PickleSerializer"
-)
+    lambda obj: cPickle.dumps(obj, -1),
+    cPickle.loads)
 
 
-def OptionSerializer(serializer):
-    """
-    >>> ser = OptionSerializer(NopSerializer)
-    >>> ser.loads(ser.dumps("Hello, World!"))
-    'Hello, World!'
-    >>> ser.loads(ser.dumps(None)) is None
-    True
-    """
-    none_placeholder = '*'
-
-    def dumps(x):
-        if x is None:
-            return none_placeholder
-        else:
-            return serializer.dumps(x)
-
-    def loads(x):
-        if x == none_placeholder:
-            return None
-        else:
-            return serializer.loads(x)
-
-    name = "OptionSerializer<%s>" % serializer.name
-    return Serializer(dumps, loads, serializer.is_comparable, name)
+def dumps(obj, stream):
+    # TODO: determining the length of non-byte objects.
+    stream.write(struct.pack("!i", len(obj)))
+    stream.write(obj)
 
 
-def PairSerializer(keySerializer, valSerializer):
-    """
-    Returns a Serializer for a (key, value) pair.
-
-    >>> ser = PairSerializer(JSONSerializer, JSONSerializer)
-    >>> ser.loads(ser.dumps((1, 2)))
-    (1, 2)
-
-    >>> ser = PairSerializer(JSONSerializer, ser)
-    >>> ser.loads(ser.dumps((1, (2, 3))))
-    (1, (2, 3))
-    """
-    def loads(kv):
-        try:
-            (key, val) = kv[1:-1].split(',', 1)
-            key = keySerializer.loads(key)
-            val = valSerializer.loads(val)
-            return (key, val)
-        except:
-            print "Error in deserializing pair from '%s'" % str(kv)
-            raise
-
-    def dumps(kv):
-        (key, val) = kv
-        return"(%s,%s)" % (keySerializer.dumps(key), valSerializer.dumps(val))
-    is_comparable = \
-        keySerializer.is_comparable and valSerializer.is_comparable
-    name = "PairSerializer<%s, %s>" % (keySerializer.name, valSerializer.name)
-    return Serializer(dumps, loads, is_comparable, name)
-
-
-def ArraySerializer(serializer):
-    """
-    >>> ser = ArraySerializer(JSONSerializer)
-    >>> ser.loads(ser.dumps([1, 2, 3, 4]))
-    [1, 2, 3, 4]
-    >>> ser = ArraySerializer(PairSerializer(JSONSerializer, PickleSerializer))
-    >>> ser.loads(ser.dumps([('a', 1), ('b', 2)]))
-    [('a', 1), ('b', 2)]
-    >>> ser.loads(ser.dumps([('a', 1)]))
-    [('a', 1)]
-    >>> ser.loads(ser.dumps([]))
-    []
-    """
-    def dumps(arr):
-        if arr == []:
-            return '[]'
-        else:
-            return '[' + '|'.join(serializer.dumps(x) for x in arr) + ']'
-
-    def loads(s):
-        if s == '[]':
-            return []
-        items = s[1:-1]
-        if '|' in items:
-            items = items.split('|')
-        else:
-            items = [items]
-        return [serializer.loads(x) for x in items]
-
-    name = "ArraySerializer<%s>" % serializer.name
-    return Serializer(dumps, loads, serializer.is_comparable, name)
-
-
-# TODO: IntegerSerializer
-
-
-# TODO: DoubleSerializer
-
-
-def _test():
-    import doctest
-    doctest.testmod()
-
-
-if __name__ == "__main__":
-    _test()
+def loads(stream):
+    length = stream.read(4)
+    if length == "":
+        raise EOFError
+    length = struct.unpack("!i", length)[0]
+    obj = stream.read(length)
+    if obj == "":
+        raise EOFError
+    return obj
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index 4c4b02fce4..21ff84fb17 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -6,9 +6,9 @@ from base64 import standard_b64decode
 # CloudPickler needs to be imported so that depicklers are registered using the
 # copy_reg module.
 from pyspark.cloudpickle import CloudPickler
+from pyspark.serializers import dumps, loads, PickleSerializer
 import cPickle
 
-
 # Redirect stdout to stderr so that users must return values from functions.
 old_stdout = sys.stdout
 sys.stdout = sys.stderr
@@ -19,58 +19,64 @@ def load_function():
 
 
 def output(x):
-    for line in x.split("\n"):
-        old_stdout.write(line.rstrip("\r\n") + "\n")
+    dumps(x, old_stdout)
 
 
 def read_input():
-    for line in sys.stdin:
-        yield line.rstrip("\r\n")
-
+    try:
+        while True:
+            yield loads(sys.stdin)
+    except EOFError:
+        return
 
 def do_combine_by_key():
     create_combiner = load_function()
     merge_value = load_function()
     merge_combiners = load_function()  # TODO: not used.
-    depickler = load_function()
-    key_pickler = load_function()
-    combiner_pickler = load_function()
     combiners = {}
-    for line in read_input():
-        # Discard the hashcode added in the Python combineByKey() method.
-        (key, value) = depickler(line)[1]
+    for obj in read_input():
+        (key, value) = PickleSerializer.loads(obj)
         if key not in combiners:
             combiners[key] = create_combiner(value)
         else:
             combiners[key] = merge_value(combiners[key], value)
     for (key, combiner) in combiners.iteritems():
-        output(key_pickler(key))
-        output(combiner_pickler(combiner))
+        output(PickleSerializer.dumps((key, combiner)))
 
 
-def do_map(map_pairs=False):
+def do_map(flat=False):
     f = load_function()
-    for line in read_input():
+    for obj in read_input():
         try:
-            out = f(line)
+            #from pickletools import dis
+            #print repr(obj)
+            #print dis(obj)
+            out = f(PickleSerializer.loads(obj))
             if out is not None:
-                if map_pairs:
+                if flat:
                     for x in out:
-                        output(x)
+                        output(PickleSerializer.dumps(x))
                 else:
-                    output(out)
+                    output(PickleSerializer.dumps(out))
         except:
-            sys.stderr.write("Error processing line '%s'\n" % line)
+            sys.stderr.write("Error processing obj %s\n" % repr(obj))
             raise
 
 
+def do_shuffle_map_step():
+    for obj in read_input():
+        key = PickleSerializer.loads(obj)[1]
+        output(str(hash(key)))
+        output(obj)
+
+
 def do_reduce():
     f = load_function()
-    dumps = load_function()
     acc = None
-    for line in read_input():
-        acc = f(line, acc)
-    output(dumps(acc))
+    for obj in read_input():
+        acc = f(PickleSerializer.loads(obj), acc)
+    if acc is not None:
+        output(PickleSerializer.dumps(acc))
 
 
 def do_echo():
@@ -80,13 +86,15 @@ def do_echo():
 def main():
     command = sys.stdin.readline().strip()
     if command == "map":
-        do_map(map_pairs=False)
-    elif command == "mapPairs":
-        do_map(map_pairs=True)
+        do_map(flat=False)
+    elif command == "flatmap":
+        do_map(flat=True)
     elif command == "combine_by_key":
         do_combine_by_key()
     elif command == "reduce":
         do_reduce()
+    elif command == "shuffle_map_step":
+        do_shuffle_map_step()
     elif command == "echo":
         do_echo()
     else:

From 607b53abfca049e7d9139e2d29893a3bb252de19 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Wed, 22 Aug 2012 00:43:55 -0700
Subject: [PATCH 004/291] Use numpy in Python k-means example.

---
 .../scala/spark/api/python/PythonRDD.scala    |  8 ++++++-
 pyspark/pyspark/examples/kmeans.py            | 23 +++++++------------
 pyspark/pyspark/rdd.py                        |  9 +++-----
 pyspark/pyspark/worker.py                     |  8 +++----
 4 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index b9a0168d18..93847e2f14 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -101,7 +101,13 @@ trait PythonRDDBase {
           stream.readFully(obj)
           obj
         } catch {
-          case eof: EOFException => { new Array[Byte](0) }
+          case eof: EOFException => {
+            val exitStatus = proc.waitFor()
+            if (exitStatus != 0) {
+              throw new Exception("Subprocess exited with status " + exitStatus)
+            }
+            new Array[Byte](0)
+          }
           case e => throw e
         }
       }
diff --git a/pyspark/pyspark/examples/kmeans.py b/pyspark/pyspark/examples/kmeans.py
index 0761d6e395..9cc366f03c 100644
--- a/pyspark/pyspark/examples/kmeans.py
+++ b/pyspark/pyspark/examples/kmeans.py
@@ -1,25 +1,18 @@
 import sys
 
 from pyspark.context import SparkContext
+from numpy import array, sum as np_sum
 
 
 def parseVector(line):
-    return [float(x) for x in line.split(' ')]
-
-
-def addVec(x, y):
-    return [a + b for (a, b) in zip(x, y)]
-
-
-def squaredDist(x, y):
-    return sum((a - b) ** 2 for (a, b) in zip(x, y))
+    return array([float(x) for x in line.split(' ')])
 
 
 def closestPoint(p, centers):
     bestIndex = 0
     closest = float("+inf")
     for i in range(len(centers)):
-        tempDist = squaredDist(p, centers[i])
+        tempDist = np_sum((p - centers[i]) ** 2)
         if tempDist < closest:
             closest = tempDist
             bestIndex = i
@@ -41,14 +34,14 @@ if __name__ == "__main__":
     tempDist = 1.0
 
     while tempDist > convergeDist:
-        closest = data.mapPairs(
+        closest = data.map(
             lambda p : (closestPoint(p, kPoints), (p, 1)))
         pointStats = closest.reduceByKey(
-            lambda (x1, y1), (x2, y2): (addVec(x1, x2), y1 + y2))
-        newPoints = pointStats.mapPairs(
-            lambda (x, (y, z)): (x, [a / z for a in y])).collect()
+            lambda (x1, y1), (x2, y2): (x1 + x2, y1 + y2))
+        newPoints = pointStats.map(
+            lambda (x, (y, z)): (x, y / z)).collect()
 
-        tempDist = sum(squaredDist(kPoints[x], y) for (x, y) in newPoints)
+        tempDist = sum(np_sum((kPoints[x] - y) ** 2) for (x, y) in newPoints)
 
         for (x, y) in newPoints:
             kPoints[x] = y
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 8eccddc0a2..ff9c483032 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -71,7 +71,7 @@ class RDD(object):
 
     def takeSample(self, withReplacement, num, seed):
         vals = self._jrdd.takeSample(withReplacement, num, seed)
-        return [PickleSerializer.loads(x) for x in vals]
+        return [PickleSerializer.loads(bytes(x)) for x in vals]
 
     def union(self, other):
         """
@@ -218,17 +218,16 @@ class RDD(object):
 
     # TODO: pipelining
     # TODO: optimizations
-    def shuffle(self, numSplits):
+    def shuffle(self, numSplits, hashFunc=hash):
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
-        pipe_command = RDD._get_pipe_command('shuffle_map_step', [])
+        pipe_command = RDD._get_pipe_command('shuffle_map_step', [hashFunc])
         class_manifest = self._jrdd.classManifest()
         python_rdd = self.ctx.jvm.PythonPairRDD(self._jrdd.rdd(),
             pipe_command, False, self.ctx.pythonExec, class_manifest)
         partitioner = self.ctx.jvm.spark.HashPartitioner(numSplits)
         jrdd = python_rdd.asJavaPairRDD().partitionBy(partitioner)
         jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
-        # TODO: extract second value.
         return RDD(jrdd, self.ctx)
 
 
@@ -277,8 +276,6 @@ class RDD(object):
         map_values_fn = lambda (k, v): (k, f(v))
         return self.map(map_values_fn, preservesPartitioning=True)
 
-    # TODO: implement shuffle.
-
     # TODO: support varargs cogroup of several RDDs.
     def groupWith(self, other):
         return self.cogroup(other)
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index 21ff84fb17..b13ed5699a 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -48,9 +48,6 @@ def do_map(flat=False):
     f = load_function()
     for obj in read_input():
         try:
-            #from pickletools import dis
-            #print repr(obj)
-            #print dis(obj)
             out = f(PickleSerializer.loads(obj))
             if out is not None:
                 if flat:
@@ -64,9 +61,10 @@ def do_map(flat=False):
 
 
 def do_shuffle_map_step():
+    hashFunc = load_function()
     for obj in read_input():
-        key = PickleSerializer.loads(obj)[1]
-        output(str(hash(key)))
+        key = PickleSerializer.loads(obj)[0]
+        output(str(hashFunc(key)))
         output(obj)
 
 

From 4b523004877cf94152225484de7683e9d17cdb56 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Thu, 23 Aug 2012 15:54:15 -0700
Subject: [PATCH 005/291] Fix options parsing in Python pi example.

---
 pyspark/pyspark/examples/pi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyspark/pyspark/examples/pi.py b/pyspark/pyspark/examples/pi.py
index ad77694c41..fe63d2c952 100644
--- a/pyspark/pyspark/examples/pi.py
+++ b/pyspark/pyspark/examples/pi.py
@@ -10,7 +10,7 @@ if __name__ == "__main__":
             "Usage: PythonPi <host> [<slices>]"
         exit(-1)
     sc = SparkContext(sys.argv[1], "PythonKMeans")
-    slices = sys.argv[2] if len(sys.argv) > 2 else 2
+    slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
     n = 100000 * slices
     def f(_):
         x = random() * 2 - 1

From f3b852ce66d193e3421eeecef71ea27bff73a94b Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 24 Aug 2012 19:38:50 -0700
Subject: [PATCH 006/291] Refactor Python MappedRDD to use iterator pipelines.

---
 pyspark/pyspark/rdd.py    | 83 ++++++++++++---------------------------
 pyspark/pyspark/worker.py | 55 +++++++-------------------
 2 files changed, 41 insertions(+), 97 deletions(-)

diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index ff9c483032..7d280d8844 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,4 +1,5 @@
 from base64 import standard_b64encode as b64enc
+from itertools import chain, ifilter, imap
 
 from pyspark import cloudpickle
 from pyspark.serializers import PickleSerializer
@@ -15,8 +16,6 @@ class RDD(object):
 
     @classmethod
     def _get_pipe_command(cls, command, functions):
-        if functions and not isinstance(functions, (list, tuple)):
-            functions = [functions]
         worker_args = [command]
         for f in functions:
             worker_args.append(b64enc(cloudpickle.dumps(f)))
@@ -28,7 +27,8 @@ class RDD(object):
         return self
 
     def map(self, f, preservesPartitioning=False):
-        return MappedRDD(self, f, preservesPartitioning)
+        def func(iterator): return imap(f, iterator)
+        return PipelinedRDD(self, func, preservesPartitioning)
 
     def flatMap(self, f):
         """
@@ -38,7 +38,8 @@ class RDD(object):
         >>> sorted(rdd.flatMap(lambda x: [(x, x), (x, x)]).collect())
         [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
         """
-        return MappedRDD(self, f, preservesPartitioning=False, command='flatmap')
+        def func(iterator): return chain.from_iterable(imap(f, iterator))
+        return PipelinedRDD(self, func)
 
     def filter(self, f):
         """
@@ -46,10 +47,10 @@ class RDD(object):
         >>> rdd.filter(lambda x: x % 2 == 0).collect()
         [2, 4]
         """
-        def filter_func(x): return x if f(x) else None
-        return RDD(self._pipe(filter_func), self.ctx)
+        def func(iterator): return ifilter(f, iterator)
+        return PipelinedRDD(self, func)
 
-    def _pipe(self, functions, command="map"):
+    def _pipe(self, functions, command):
         class_manifest = self._jrdd.classManifest()
         pipe_command = RDD._get_pipe_command(command, functions)
         python_rdd = self.ctx.jvm.PythonRDD(self._jrdd.rdd(), pipe_command,
@@ -128,7 +129,16 @@ class RDD(object):
         >>> sc.parallelize((2 for _ in range(10))).map(lambda x: 1).cache().reduce(add)
         10
         """
-        vals = MappedRDD(self, f, command="reduce", preservesPartitioning=False).collect()
+        def func(iterator):
+            acc = None
+            for obj in iterator:
+                if acc is None:
+                    acc = obj
+                else:
+                    acc = f(obj, acc)
+            if acc is not None:
+                yield acc
+        vals = PipelinedRDD(self, func).collect()
         return reduce(f, vals)
 
     # TODO: fold
@@ -230,8 +240,6 @@ class RDD(object):
         jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
         return RDD(jrdd, self.ctx)
 
-
-
     def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
                      numSplits=None):
         """
@@ -297,7 +305,7 @@ class RDD(object):
     # TODO: file saving
 
 
-class MappedRDD(RDD):
+class PipelinedRDD(RDD):
     """
     Pipelined maps:
     >>> rdd = sc.parallelize([1, 2, 3, 4])
@@ -313,68 +321,29 @@ class MappedRDD(RDD):
     >>> rdd.flatMap(lambda x: [x, x]).reduce(add)
     20
     """
-    def __init__(self, prev, func, preservesPartitioning=False, command='map'):
-        if isinstance(prev, MappedRDD) and not prev.is_cached:
+    def __init__(self, prev, func, preservesPartitioning=False):
+        if isinstance(prev, PipelinedRDD) and not prev.is_cached:
             prev_func = prev.func
-            if command == 'reduce':
-                if prev.command == 'flatmap':
-                    def flatmap_reduce_func(x, acc):
-                        values = prev_func(x)
-                        if values is None:
-                            return acc
-                        if not acc:
-                            if len(values) == 1:
-                                return values[0]
-                            else:
-                                return reduce(func, values[1:], values[0])
-                        else:
-                            return reduce(func, values, acc)
-                    self.func = flatmap_reduce_func
-                else:
-                    def reduce_func(x, acc):
-                        val = prev_func(x)
-                        if not val:
-                            return acc
-                        if acc is None:
-                            return val
-                        else:
-                            return func(val, acc)
-                    self.func = reduce_func
-            else:
-                if prev.command == 'flatmap':
-                    command = 'flatmap'
-                    self.func = lambda x: (func(y) for y in prev_func(x))
-                else:
-                    self.func = lambda x: func(prev_func(x))
-
+            def pipeline_func(iterator):
+                return func(prev_func(iterator))
+            self.func = pipeline_func
             self.preservesPartitioning = \
                 prev.preservesPartitioning and preservesPartitioning
             self._prev_jrdd = prev._prev_jrdd
-            self.is_pipelined = True
         else:
-            if command == 'reduce':
-                def reduce_func(val, acc):
-                    if acc is None:
-                        return val
-                    else:
-                        return func(val, acc)
-                self.func = reduce_func
-            else:
-                self.func = func
+            self.func = func
             self.preservesPartitioning = preservesPartitioning
             self._prev_jrdd = prev._jrdd
-            self.is_pipelined = False
         self.is_cached = False
         self.ctx = prev.ctx
         self.prev = prev
         self._jrdd_val = None
-        self.command = command
 
     @property
     def _jrdd(self):
         if not self._jrdd_val:
             funcs = [self.func]
-            pipe_command = RDD._get_pipe_command(self.command, funcs)
+            pipe_command = RDD._get_pipe_command("pipeline", funcs)
             class_manifest = self._prev_jrdd.classManifest()
             python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
                 pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index b13ed5699a..76b09918e7 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -25,17 +25,17 @@ def output(x):
 def read_input():
     try:
         while True:
-            yield loads(sys.stdin)
+            yield cPickle.loads(loads(sys.stdin))
     except EOFError:
         return
 
+
 def do_combine_by_key():
     create_combiner = load_function()
     merge_value = load_function()
     merge_combiners = load_function()  # TODO: not used.
     combiners = {}
-    for obj in read_input():
-        (key, value) = PickleSerializer.loads(obj)
+    for (key, value) in read_input():
         if key not in combiners:
             combiners[key] = create_combiner(value)
         else:
@@ -44,57 +44,32 @@ def do_combine_by_key():
         output(PickleSerializer.dumps((key, combiner)))
 
 
-def do_map(flat=False):
+def do_pipeline():
     f = load_function()
-    for obj in read_input():
-        try:
-            out = f(PickleSerializer.loads(obj))
-            if out is not None:
-                if flat:
-                    for x in out:
-                        output(PickleSerializer.dumps(x))
-                else:
-                    output(PickleSerializer.dumps(out))
-        except:
-            sys.stderr.write("Error processing obj %s\n" % repr(obj))
-            raise
+    for obj in f(read_input()):
+        output(PickleSerializer.dumps(obj))
 
 
 def do_shuffle_map_step():
     hashFunc = load_function()
-    for obj in read_input():
-        key = PickleSerializer.loads(obj)[0]
+    while True:
+        try:
+            pickled = loads(sys.stdin)
+        except EOFError:
+            return
+        key = cPickle.loads(pickled)[0]
         output(str(hashFunc(key)))
-        output(obj)
-
-
-def do_reduce():
-    f = load_function()
-    acc = None
-    for obj in read_input():
-        acc = f(PickleSerializer.loads(obj), acc)
-    if acc is not None:
-        output(PickleSerializer.dumps(acc))
-
-
-def do_echo():
-    old_stdout.writelines(sys.stdin.readlines())
+        output(pickled)
 
 
 def main():
     command = sys.stdin.readline().strip()
-    if command == "map":
-        do_map(flat=False)
-    elif command == "flatmap":
-        do_map(flat=True)
+    if command == "pipeline":
+        do_pipeline()
     elif command == "combine_by_key":
         do_combine_by_key()
-    elif command == "reduce":
-        do_reduce()
     elif command == "shuffle_map_step":
         do_shuffle_map_step()
-    elif command == "echo":
-        do_echo()
     else:
         raise Exception("Unsupported command %s" % command)
 

From 65e8406029a0fe1e1c5c5d033d335b43f6743a04 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 24 Aug 2012 21:07:26 -0700
Subject: [PATCH 007/291] Implement fold() in Python API.

---
 pyspark/pyspark/rdd.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 7d280d8844..af7703fdfc 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -141,7 +141,25 @@ class RDD(object):
         vals = PipelinedRDD(self, func).collect()
         return reduce(f, vals)
 
-    # TODO: fold
+    def fold(self, zeroValue, op):
+        """
+        Aggregate the elements of each partition, and then the results for all
+        the partitions, using a given associative function and a neutral "zero
+        value." The function op(t1, t2) is allowed to modify t1 and return it
+        as its result value to avoid object allocation; however, it should not
+        modify t2.
+
+        >>> from operator import add
+        >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add)
+        15
+        """
+        def func(iterator):
+            acc = zeroValue
+            for obj in iterator:
+                acc = op(obj, acc)
+            yield acc
+        vals = PipelinedRDD(self, func).collect()
+        return reduce(op, vals, zeroValue)
 
     # TODO: aggregate
 

From f79a1e4d2a8643157136de69b8d7de84f0034712 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 25 Aug 2012 13:59:01 -0700
Subject: [PATCH 008/291] Add broadcast variables to Python API.

---
 .../scala/spark/api/python/PythonRDD.scala    | 43 ++++++++++-------
 pyspark/pyspark/broadcast.py                  | 46 +++++++++++++++++++
 pyspark/pyspark/context.py                    | 17 +++++--
 pyspark/pyspark/rdd.py                        | 27 +++++++----
 pyspark/pyspark/worker.py                     |  6 +++
 5 files changed, 110 insertions(+), 29 deletions(-)
 create mode 100644 pyspark/pyspark/broadcast.py

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 93847e2f14..5163812df4 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -7,14 +7,13 @@ import scala.collection.JavaConversions._
 import scala.io.Source
 import spark._
 import api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
-import scala.{collection, Some}
-import collection.parallel.mutable
+import broadcast.Broadcast
 import scala.collection
-import scala.Some
 
 trait PythonRDDBase {
   def compute[T](split: Split, envVars: Map[String, String],
-    command: Seq[String], parent: RDD[T], pythonExec: String): Iterator[Array[Byte]] = {
+    command: Seq[String], parent: RDD[T], pythonExec: String,
+    broadcastVars: java.util.List[Broadcast[Array[Byte]]]): Iterator[Array[Byte]] = {
     val SPARK_HOME = new ProcessBuilder().environment().get("SPARK_HOME")
 
     val pb = new ProcessBuilder(Seq(pythonExec, SPARK_HOME + "/pyspark/pyspark/worker.py"))
@@ -42,11 +41,18 @@ trait PythonRDDBase {
       override def run() {
         SparkEnv.set(env)
         val out = new PrintWriter(proc.getOutputStream)
+        val dOut = new DataOutputStream(proc.getOutputStream)
+        out.println(broadcastVars.length)
+        for (broadcast <- broadcastVars) {
+          out.print(broadcast.uuid.toString)
+          dOut.writeInt(broadcast.value.length)
+          dOut.write(broadcast.value)
+          dOut.flush()
+        }
         for (elem <- command) {
           out.println(elem)
         }
         out.flush()
-        val dOut = new DataOutputStream(proc.getOutputStream)
         for (elem <- parent.iterator(split)) {
           if (elem.isInstanceOf[Array[Byte]]) {
             val arr = elem.asInstanceOf[Array[Byte]]
@@ -121,16 +127,17 @@ trait PythonRDDBase {
 
 class PythonRDD[T: ClassManifest](
   parent: RDD[T], command: Seq[String], envVars: Map[String, String],
-  preservePartitoning: Boolean, pythonExec: String)
+  preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]])
   extends RDD[Array[Byte]](parent.context) with PythonRDDBase {
 
-  def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean, pythonExec: String) =
-    this(parent, command, Map(), preservePartitoning, pythonExec)
+  def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean,
+    pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
+    this(parent, command, Map(), preservePartitoning, pythonExec, broadcastVars)
 
   // Similar to Runtime.exec(), if we are given a single string, split it into words
   // using a standard StringTokenizer (i.e. by spaces)
-  def this(parent: RDD[T], command: String, preservePartitoning: Boolean, pythonExec: String) =
-    this(parent, PipedRDD.tokenize(command), preservePartitoning, pythonExec)
+  def this(parent: RDD[T], command: String, preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
+    this(parent, PipedRDD.tokenize(command), preservePartitoning, pythonExec, broadcastVars)
 
   override def splits = parent.splits
 
@@ -139,23 +146,25 @@ class PythonRDD[T: ClassManifest](
   override val partitioner = if (preservePartitoning) parent.partitioner else None
 
   override def compute(split: Split): Iterator[Array[Byte]] =
-    compute(split, envVars, command, parent, pythonExec)
+    compute(split, envVars, command, parent, pythonExec, broadcastVars)
 
   val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
 }
 
 class PythonPairRDD[T: ClassManifest] (
   parent: RDD[T], command: Seq[String], envVars: Map[String, String],
-  preservePartitoning: Boolean, pythonExec: String)
+  preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]])
   extends RDD[(Array[Byte], Array[Byte])](parent.context) with PythonRDDBase {
 
-  def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean, pythonExec: String) =
-    this(parent, command, Map(), preservePartitoning, pythonExec)
+  def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean,
+    pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
+    this(parent, command, Map(), preservePartitoning, pythonExec, broadcastVars)
 
   // Similar to Runtime.exec(), if we are given a single string, split it into words
   // using a standard StringTokenizer (i.e. by spaces)
-  def this(parent: RDD[T], command: String, preservePartitoning: Boolean, pythonExec: String) =
-    this(parent, PipedRDD.tokenize(command), preservePartitoning, pythonExec)
+  def this(parent: RDD[T], command: String, preservePartitoning: Boolean, pythonExec: String,
+    broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
+    this(parent, PipedRDD.tokenize(command), preservePartitoning, pythonExec, broadcastVars)
 
   override def splits = parent.splits
 
@@ -164,7 +173,7 @@ class PythonPairRDD[T: ClassManifest] (
   override val partitioner = if (preservePartitoning) parent.partitioner else None
 
   override def compute(split: Split): Iterator[(Array[Byte], Array[Byte])] = {
-    compute(split, envVars, command, parent, pythonExec).grouped(2).map {
+    compute(split, envVars, command, parent, pythonExec, broadcastVars).grouped(2).map {
       case Seq(a, b) => (a, b)
       case x          => throw new Exception("PythonPairRDD: unexpected value: " + x)
     }
diff --git a/pyspark/pyspark/broadcast.py b/pyspark/pyspark/broadcast.py
new file mode 100644
index 0000000000..1ea17d59af
--- /dev/null
+++ b/pyspark/pyspark/broadcast.py
@@ -0,0 +1,46 @@
+"""
+>>> from pyspark.context import SparkContext
+>>> sc = SparkContext('local', 'test')
+>>> b = sc.broadcast([1, 2, 3, 4, 5])
+>>> b.value
+[1, 2, 3, 4, 5]
+
+>>> from pyspark.broadcast import _broadcastRegistry
+>>> _broadcastRegistry[b.uuid] = b
+>>> from cPickle import dumps, loads
+>>> loads(dumps(b)).value
+[1, 2, 3, 4, 5]
+
+>>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
+[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
+"""
+# Holds broadcasted data received from Java, keyed by UUID.
+_broadcastRegistry = {}
+
+
+def _from_uuid(uuid):
+    from pyspark.broadcast import _broadcastRegistry
+    if uuid not in _broadcastRegistry:
+        raise Exception("Broadcast variable '%s' not loaded!" % uuid)
+    return _broadcastRegistry[uuid]
+
+
+class Broadcast(object):
+    def __init__(self, uuid, value, java_broadcast=None, pickle_registry=None):
+        self.value = value
+        self.uuid = uuid
+        self._jbroadcast = java_broadcast
+        self._pickle_registry = pickle_registry
+
+    def __reduce__(self):
+        self._pickle_registry.add(self)
+        return (_from_uuid, (self.uuid, ))
+
+
+def _test():
+    import doctest
+    doctest.testmod()
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index ac7e4057e9..6f87206665 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -2,6 +2,7 @@ import os
 import atexit
 from tempfile import NamedTemporaryFile
 
+from pyspark.broadcast import Broadcast
 from pyspark.java_gateway import launch_gateway
 from pyspark.serializers import PickleSerializer, dumps
 from pyspark.rdd import RDD
@@ -24,6 +25,11 @@ class SparkContext(object):
         self.defaultParallelism = \
             defaultParallelism or self._jsc.sc().defaultParallelism()
         self.pythonExec = pythonExec
+        # Broadcast's __reduce__ method stores Broadcast instances here.
+        # This allows other code to determine which Broadcast instances have
+        # been pickled, so it can determine which Java broadcast objects to
+        # send.
+        self._pickled_broadcast_vars = set()
 
     def __del__(self):
         if self._jsc:
@@ -52,7 +58,12 @@ class SparkContext(object):
         jrdd = self.pickleFile(self._jsc, tempFile.name, numSlices)
         return RDD(jrdd, self)
 
-    def textFile(self, name, numSlices=None):
-        numSlices = numSlices or self.defaultParallelism
-        jrdd = self._jsc.textFile(name, numSlices)
+    def textFile(self, name, minSplits=None):
+        minSplits = minSplits or min(self.defaultParallelism, 2)
+        jrdd = self._jsc.textFile(name, minSplits)
         return RDD(jrdd, self)
+
+    def broadcast(self, value):
+        jbroadcast = self._jsc.broadcast(bytearray(PickleSerializer.dumps(value)))
+        return Broadcast(jbroadcast.uuid().toString(), value, jbroadcast,
+                         self._pickled_broadcast_vars)
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index af7703fdfc..4459095391 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -6,6 +6,8 @@ from pyspark.serializers import PickleSerializer
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_cogroup
 
+from py4j.java_collections import ListConverter
+
 
 class RDD(object):
 
@@ -15,11 +17,15 @@ class RDD(object):
         self.ctx = ctx
 
     @classmethod
-    def _get_pipe_command(cls, command, functions):
+    def _get_pipe_command(cls, ctx, command, functions):
         worker_args = [command]
         for f in functions:
             worker_args.append(b64enc(cloudpickle.dumps(f)))
-        return " ".join(worker_args)
+        broadcast_vars = [x._jbroadcast for x in ctx._pickled_broadcast_vars]
+        broadcast_vars = ListConverter().convert(broadcast_vars,
+                                                 ctx.gateway._gateway_client)
+        ctx._pickled_broadcast_vars.clear()
+        return (" ".join(worker_args), broadcast_vars)
 
     def cache(self):
         self.is_cached = True
@@ -52,9 +58,10 @@ class RDD(object):
 
     def _pipe(self, functions, command):
         class_manifest = self._jrdd.classManifest()
-        pipe_command = RDD._get_pipe_command(command, functions)
+        (pipe_command, broadcast_vars) = \
+            RDD._get_pipe_command(self.ctx, command, functions)
         python_rdd = self.ctx.jvm.PythonRDD(self._jrdd.rdd(), pipe_command,
-            False, self.ctx.pythonExec, class_manifest)
+            False, self.ctx.pythonExec, broadcast_vars, class_manifest)
         return python_rdd.asJavaRDD()
 
     def distinct(self):
@@ -249,10 +256,12 @@ class RDD(object):
     def shuffle(self, numSplits, hashFunc=hash):
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
-        pipe_command = RDD._get_pipe_command('shuffle_map_step', [hashFunc])
+        (pipe_command, broadcast_vars) = \
+            RDD._get_pipe_command(self.ctx, 'shuffle_map_step', [hashFunc])
         class_manifest = self._jrdd.classManifest()
         python_rdd = self.ctx.jvm.PythonPairRDD(self._jrdd.rdd(),
-            pipe_command, False, self.ctx.pythonExec, class_manifest)
+            pipe_command, False, self.ctx.pythonExec, broadcast_vars,
+            class_manifest)
         partitioner = self.ctx.jvm.spark.HashPartitioner(numSplits)
         jrdd = python_rdd.asJavaPairRDD().partitionBy(partitioner)
         jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
@@ -360,12 +369,12 @@ class PipelinedRDD(RDD):
     @property
     def _jrdd(self):
         if not self._jrdd_val:
-            funcs = [self.func]
-            pipe_command = RDD._get_pipe_command("pipeline", funcs)
+            (pipe_command, broadcast_vars) = \
+                RDD._get_pipe_command(self.ctx, "pipeline", [self.func])
             class_manifest = self._prev_jrdd.classManifest()
             python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
                 pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
-                class_manifest)
+                broadcast_vars, class_manifest)
             self._jrdd_val = python_rdd.asJavaRDD()
         return self._jrdd_val
 
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index 76b09918e7..7402897ac8 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -5,6 +5,7 @@ import sys
 from base64 import standard_b64decode
 # CloudPickler needs to be imported so that depicklers are registered using the
 # copy_reg module.
+from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.cloudpickle import CloudPickler
 from pyspark.serializers import dumps, loads, PickleSerializer
 import cPickle
@@ -63,6 +64,11 @@ def do_shuffle_map_step():
 
 
 def main():
+    num_broadcast_variables = int(sys.stdin.readline().strip())
+    for _ in range(num_broadcast_variables):
+        uuid = sys.stdin.read(36)
+        value = loads(sys.stdin)
+        _broadcastRegistry[uuid] = Broadcast(uuid, cPickle.loads(value))
     command = sys.stdin.readline().strip()
     if command == "pipeline":
         do_pipeline()

From 08b201d810c0dc0933d00d78ec2c1d9135e100c3 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 24 Aug 2012 22:51:45 -0700
Subject: [PATCH 009/291] Add mapPartitions(), glom(), countByValue() to Python
 API.

---
 pyspark/pyspark/rdd.py | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 4459095391..f0d665236a 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,4 +1,5 @@
 from base64 import standard_b64encode as b64enc
+from collections import Counter
 from itertools import chain, ifilter, imap
 
 from pyspark import cloudpickle
@@ -47,6 +48,15 @@ class RDD(object):
         def func(iterator): return chain.from_iterable(imap(f, iterator))
         return PipelinedRDD(self, func)
 
+    def mapPartitions(self, f):
+        """
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
+        >>> def f(iterator): yield sum(iterator)
+        >>> rdd.mapPartitions(f).collect()
+        [3, 7]
+        """
+        return PipelinedRDD(self, f)
+
     def filter(self, f):
         """
         >>> rdd = sc.parallelize([1, 2, 3, 4, 5])
@@ -93,7 +103,14 @@ class RDD(object):
 
     # TODO: Overload __add___?
 
-    # TODO: glom
+    def glom(self):
+        """
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
+        >>> rdd.glom().first()
+        [1, 2]
+        """
+        def func(iterator): yield list(iterator)
+        return PipelinedRDD(self, func)
 
     def cartesian(self, other):
         """
@@ -115,8 +132,6 @@ class RDD(object):
 
     # TODO: pipe
 
-    # TODO: mapPartitions
-
     def foreach(self, f):
         """
         >>> def f(x): print x
@@ -177,7 +192,16 @@ class RDD(object):
         """
         return self._jrdd.count()
 
-    # TODO: count approx methods
+    def countByValue(self):
+        """
+        >>> sc.parallelize([1, 2, 1, 2, 2]).countByValue().most_common()
+        [(2, 3), (1, 2)]
+        """
+        def countPartition(iterator):
+            yield Counter(iterator)
+        def mergeMaps(m1, m2):
+            return m1 + m2
+        return self.mapPartitions(countPartition).reduce(mergeMaps)
 
     def take(self, num):
         """

From 8b64b7ecd80c52f2f09a517f1517c0ece7a3d57f Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 24 Aug 2012 23:09:15 -0700
Subject: [PATCH 010/291] Add countByKey(), reduceByKeyLocally() to Python API

---
 pyspark/pyspark/rdd.py | 52 +++++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index f0d665236a..fd41ea0b17 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -99,9 +99,17 @@ class RDD(object):
         """
         return RDD(self._jrdd.union(other._jrdd), self.ctx)
 
-    # TODO: sort
+    def __add__(self, other):
+        """
+        >>> rdd = sc.parallelize([1, 1, 2, 3])
+        >>> (rdd + rdd).collect()
+        [1, 1, 2, 3, 1, 1, 2, 3]
+        """
+        if not isinstance(other, RDD):
+            raise TypeError
+        return self.union(other)
 
-    # TODO: Overload __add___?
+    # TODO: sort
 
     def glom(self):
         """
@@ -120,7 +128,6 @@ class RDD(object):
         """
         return RDD(self._jrdd.cartesian(other._jrdd), self.ctx)
 
-    # numsplits
     def groupBy(self, f, numSplits=None):
         """
         >>> rdd = sc.parallelize([1, 1, 2, 3, 5, 8])
@@ -236,17 +243,38 @@ class RDD(object):
 
     def reduceByKey(self, func, numSplits=None):
         """
-        >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
-        >>> sorted(x.reduceByKey(lambda a, b: a + b).collect())
+        >>> from operator import add
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
+        >>> sorted(rdd.reduceByKey(add).collect())
         [('a', 2), ('b', 1)]
         """
         return self.combineByKey(lambda x: x, func, func, numSplits)
 
-    # TODO: reduceByKeyLocally()
+    def reduceByKeyLocally(self, func):
+        """
+        >>> from operator import add
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
+        >>> sorted(rdd.reduceByKeyLocally(add).items())
+        [('a', 2), ('b', 1)]
+        """
+        def reducePartition(iterator):
+            m = {}
+            for (k, v) in iterator:
+                m[k] = v if k not in m else func(m[k], v)
+            yield m
+        def mergeMaps(m1, m2):
+            for (k, v) in m2.iteritems():
+                m1[k] = v if k not in m1 else func(m1[k], v)
+            return m1
+        return self.mapPartitions(reducePartition).reduce(mergeMaps)
 
-    # TODO: countByKey()
-
-    # TODO: partitionBy
+    def countByKey(self):
+        """
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
+        >>> rdd.countByKey().most_common()
+        [('a', 2), ('b', 1)]
+        """
+        return self.map(lambda x: x[0]).countByValue()
 
     def join(self, other, numSplits=None):
         """
@@ -277,7 +305,7 @@ class RDD(object):
 
     # TODO: pipelining
     # TODO: optimizations
-    def shuffle(self, numSplits, hashFunc=hash):
+    def partitionBy(self, numSplits, hashFunc=hash):
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
         (pipe_command, broadcast_vars) = \
@@ -302,7 +330,7 @@ class RDD(object):
         """
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
-        shuffled = self.shuffle(numSplits)
+        shuffled = self.partitionBy(numSplits)
         functions = [createCombiner, mergeValue, mergeCombiners]
         jpairs = shuffled._pipe(functions, "combine_by_key")
         return RDD(jpairs, self.ctx)
@@ -353,8 +381,6 @@ class RDD(object):
     # keys in the pairs.  This could be an expensive operation, since those
     # hashes aren't retained.
 
-    # TODO: file saving
-
 
 class PipelinedRDD(RDD):
     """

From 6904cb77d4306a14891cc71338c8f9f966d009f1 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 25 Aug 2012 14:19:07 -0700
Subject: [PATCH 011/291] Use local combiners in Python API combineByKey().

---
 pyspark/pyspark/rdd.py    | 33 ++++++++++++++++++++++++---------
 pyspark/pyspark/worker.py | 16 ----------------
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index fd41ea0b17..3528b8f308 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -46,7 +46,7 @@ class RDD(object):
         [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
         """
         def func(iterator): return chain.from_iterable(imap(f, iterator))
-        return PipelinedRDD(self, func)
+        return self.mapPartitions(func)
 
     def mapPartitions(self, f):
         """
@@ -64,7 +64,7 @@ class RDD(object):
         [2, 4]
         """
         def func(iterator): return ifilter(f, iterator)
-        return PipelinedRDD(self, func)
+        return self.mapPartitions(func)
 
     def _pipe(self, functions, command):
         class_manifest = self._jrdd.classManifest()
@@ -118,7 +118,7 @@ class RDD(object):
         [1, 2]
         """
         def func(iterator): yield list(iterator)
-        return PipelinedRDD(self, func)
+        return self.mapPartitions(func)
 
     def cartesian(self, other):
         """
@@ -167,7 +167,7 @@ class RDD(object):
                     acc = f(obj, acc)
             if acc is not None:
                 yield acc
-        vals = PipelinedRDD(self, func).collect()
+        vals = self.mapPartitions(func).collect()
         return reduce(f, vals)
 
     def fold(self, zeroValue, op):
@@ -187,7 +187,7 @@ class RDD(object):
             for obj in iterator:
                 acc = op(obj, acc)
             yield acc
-        vals = PipelinedRDD(self, func).collect()
+        vals = self.mapPartitions(func).collect()
         return reduce(op, vals, zeroValue)
 
     # TODO: aggregate
@@ -330,10 +330,25 @@ class RDD(object):
         """
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
-        shuffled = self.partitionBy(numSplits)
-        functions = [createCombiner, mergeValue, mergeCombiners]
-        jpairs = shuffled._pipe(functions, "combine_by_key")
-        return RDD(jpairs, self.ctx)
+        def combineLocally(iterator):
+            combiners = {}
+            for (k, v) in iterator:
+                if k not in combiners:
+                    combiners[k] = createCombiner(v)
+                else:
+                    combiners[k] = mergeValue(combiners[k], v)
+            return combiners.iteritems()
+        locally_combined = self.mapPartitions(combineLocally)
+        shuffled = locally_combined.partitionBy(numSplits)
+        def _mergeCombiners(iterator):
+            combiners = {}
+            for (k, v) in iterator:
+                if not k in combiners:
+                    combiners[k] = v
+                else:
+                    combiners[k] = mergeCombiners(combiners[k], v)
+            return combiners.iteritems()
+        return shuffled.mapPartitions(_mergeCombiners)
 
     def groupByKey(self, numSplits=None):
         """
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index 7402897ac8..0f90c6ff46 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -31,20 +31,6 @@ def read_input():
         return
 
 
-def do_combine_by_key():
-    create_combiner = load_function()
-    merge_value = load_function()
-    merge_combiners = load_function()  # TODO: not used.
-    combiners = {}
-    for (key, value) in read_input():
-        if key not in combiners:
-            combiners[key] = create_combiner(value)
-        else:
-            combiners[key] = merge_value(combiners[key], value)
-    for (key, combiner) in combiners.iteritems():
-        output(PickleSerializer.dumps((key, combiner)))
-
-
 def do_pipeline():
     f = load_function()
     for obj in f(read_input()):
@@ -72,8 +58,6 @@ def main():
     command = sys.stdin.readline().strip()
     if command == "pipeline":
         do_pipeline()
-    elif command == "combine_by_key":
-        do_combine_by_key()
     elif command == "shuffle_map_step":
         do_shuffle_map_step()
     else:

From 200d248dcc5903295296bf897211cf543b37f8c1 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 25 Aug 2012 16:46:07 -0700
Subject: [PATCH 012/291] Simplify Python worker; pipeline the map step of
 partitionBy().

---
 .../scala/spark/api/python/PythonRDD.scala    | 34 ++-------
 pyspark/pyspark/context.py                    |  9 ++-
 pyspark/pyspark/rdd.py                        | 70 +++++++------------
 pyspark/pyspark/serializers.py                | 23 ++----
 pyspark/pyspark/worker.py                     | 50 ++++---------
 5 files changed, 59 insertions(+), 127 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 5163812df4..b9091fd436 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -151,38 +151,18 @@ class PythonRDD[T: ClassManifest](
   val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
 }
 
-class PythonPairRDD[T: ClassManifest] (
-  parent: RDD[T], command: Seq[String], envVars: Map[String, String],
-  preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]])
-  extends RDD[(Array[Byte], Array[Byte])](parent.context) with PythonRDDBase {
-
-  def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean,
-    pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
-    this(parent, command, Map(), preservePartitoning, pythonExec, broadcastVars)
-
-  // Similar to Runtime.exec(), if we are given a single string, split it into words
-  // using a standard StringTokenizer (i.e. by spaces)
-  def this(parent: RDD[T], command: String, preservePartitoning: Boolean, pythonExec: String,
-    broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
-    this(parent, PipedRDD.tokenize(command), preservePartitoning, pythonExec, broadcastVars)
-
-  override def splits = parent.splits
-
-  override val dependencies = List(new OneToOneDependency(parent))
-
-  override val partitioner = if (preservePartitoning) parent.partitioner else None
-
-  override def compute(split: Split): Iterator[(Array[Byte], Array[Byte])] = {
-    compute(split, envVars, command, parent, pythonExec, broadcastVars).grouped(2).map {
+private class PairwiseRDD(prev: RDD[Array[Byte]]) extends
+  RDD[(Array[Byte], Array[Byte])](prev.context) {
+  override def splits = prev.splits
+  override val dependencies = List(new OneToOneDependency(prev))
+  override def compute(split: Split) =
+    prev.iterator(split).grouped(2).map {
       case Seq(a, b) => (a, b)
-      case x          => throw new Exception("PythonPairRDD: unexpected value: " + x)
+      case x          => throw new Exception("PairwiseRDD: unexpected value: " + x)
     }
-  }
-
   val asJavaPairRDD : JavaPairRDD[Array[Byte], Array[Byte]] = JavaPairRDD.fromRDD(this)
 }
 
-
 object PythonRDD {
 
   /** Strips the pickle PROTO and STOP opcodes from the start and end of a pickle */
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 6f87206665..b8490019e3 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -4,7 +4,7 @@ from tempfile import NamedTemporaryFile
 
 from pyspark.broadcast import Broadcast
 from pyspark.java_gateway import launch_gateway
-from pyspark.serializers import PickleSerializer, dumps
+from pyspark.serializers import dump_pickle, write_with_length
 from pyspark.rdd import RDD
 
 
@@ -16,9 +16,8 @@ class SparkContext(object):
     asPickle = jvm.spark.api.python.PythonRDD.asPickle
     arrayAsPickle = jvm.spark.api.python.PythonRDD.arrayAsPickle
 
-
     def __init__(self, master, name, defaultParallelism=None,
-        pythonExec='python'):
+                 pythonExec='python'):
         self.master = master
         self.name = name
         self._jsc = self.jvm.JavaSparkContext(master, name)
@@ -52,7 +51,7 @@ class SparkContext(object):
         # objects are written to a file and loaded through textFile().
         tempFile = NamedTemporaryFile(delete=False)
         for x in c:
-            dumps(PickleSerializer.dumps(x), tempFile)
+            write_with_length(dump_pickle(x), tempFile)
         tempFile.close()
         atexit.register(lambda: os.unlink(tempFile.name))
         jrdd = self.pickleFile(self._jsc, tempFile.name, numSlices)
@@ -64,6 +63,6 @@ class SparkContext(object):
         return RDD(jrdd, self)
 
     def broadcast(self, value):
-        jbroadcast = self._jsc.broadcast(bytearray(PickleSerializer.dumps(value)))
+        jbroadcast = self._jsc.broadcast(bytearray(dump_pickle(value)))
         return Broadcast(jbroadcast.uuid().toString(), value, jbroadcast,
                          self._pickled_broadcast_vars)
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 3528b8f308..21e822ba9f 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -3,7 +3,7 @@ from collections import Counter
 from itertools import chain, ifilter, imap
 
 from pyspark import cloudpickle
-from pyspark.serializers import PickleSerializer
+from pyspark.serializers import dump_pickle, load_pickle
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_cogroup
 
@@ -17,17 +17,6 @@ class RDD(object):
         self.is_cached = False
         self.ctx = ctx
 
-    @classmethod
-    def _get_pipe_command(cls, ctx, command, functions):
-        worker_args = [command]
-        for f in functions:
-            worker_args.append(b64enc(cloudpickle.dumps(f)))
-        broadcast_vars = [x._jbroadcast for x in ctx._pickled_broadcast_vars]
-        broadcast_vars = ListConverter().convert(broadcast_vars,
-                                                 ctx.gateway._gateway_client)
-        ctx._pickled_broadcast_vars.clear()
-        return (" ".join(worker_args), broadcast_vars)
-
     def cache(self):
         self.is_cached = True
         self._jrdd.cache()
@@ -66,14 +55,6 @@ class RDD(object):
         def func(iterator): return ifilter(f, iterator)
         return self.mapPartitions(func)
 
-    def _pipe(self, functions, command):
-        class_manifest = self._jrdd.classManifest()
-        (pipe_command, broadcast_vars) = \
-            RDD._get_pipe_command(self.ctx, command, functions)
-        python_rdd = self.ctx.jvm.PythonRDD(self._jrdd.rdd(), pipe_command,
-            False, self.ctx.pythonExec, broadcast_vars, class_manifest)
-        return python_rdd.asJavaRDD()
-
     def distinct(self):
         """
         >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
@@ -89,7 +70,7 @@ class RDD(object):
 
     def takeSample(self, withReplacement, num, seed):
         vals = self._jrdd.takeSample(withReplacement, num, seed)
-        return [PickleSerializer.loads(bytes(x)) for x in vals]
+        return [load_pickle(bytes(x)) for x in vals]
 
     def union(self, other):
         """
@@ -148,7 +129,7 @@ class RDD(object):
 
     def collect(self):
         pickle = self.ctx.arrayAsPickle(self._jrdd.rdd().collect())
-        return PickleSerializer.loads(bytes(pickle))
+        return load_pickle(bytes(pickle))
 
     def reduce(self, f):
         """
@@ -216,19 +197,17 @@ class RDD(object):
         [2, 3]
         """
         pickle = self.ctx.arrayAsPickle(self._jrdd.rdd().take(num))
-        return PickleSerializer.loads(bytes(pickle))
+        return load_pickle(bytes(pickle))
 
     def first(self):
         """
         >>> sc.parallelize([2, 3, 4]).first()
         2
         """
-        return PickleSerializer.loads(bytes(self.ctx.asPickle(self._jrdd.first())))
+        return load_pickle(bytes(self.ctx.asPickle(self._jrdd.first())))
 
     # TODO: saveAsTextFile
 
-    # TODO: saveAsObjectFile
-
     # Pair functions
 
     def collectAsMap(self):
@@ -303,19 +282,18 @@ class RDD(object):
         """
         return python_right_outer_join(self, other, numSplits)
 
-    # TODO: pipelining
-    # TODO: optimizations
     def partitionBy(self, numSplits, hashFunc=hash):
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
-        (pipe_command, broadcast_vars) = \
-            RDD._get_pipe_command(self.ctx, 'shuffle_map_step', [hashFunc])
-        class_manifest = self._jrdd.classManifest()
-        python_rdd = self.ctx.jvm.PythonPairRDD(self._jrdd.rdd(),
-            pipe_command, False, self.ctx.pythonExec, broadcast_vars,
-            class_manifest)
+        def add_shuffle_key(iterator):
+            for (k, v) in iterator:
+                yield str(hashFunc(k))
+                yield dump_pickle((k, v))
+        keyed = PipelinedRDD(self, add_shuffle_key)
+        keyed._bypass_serializer = True
+        pairRDD = self.ctx.jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
         partitioner = self.ctx.jvm.spark.HashPartitioner(numSplits)
-        jrdd = python_rdd.asJavaPairRDD().partitionBy(partitioner)
+        jrdd = pairRDD.partitionBy(partitioner)
         jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
         return RDD(jrdd, self.ctx)
 
@@ -430,17 +408,23 @@ class PipelinedRDD(RDD):
         self.ctx = prev.ctx
         self.prev = prev
         self._jrdd_val = None
+        self._bypass_serializer = False
 
     @property
     def _jrdd(self):
-        if not self._jrdd_val:
-            (pipe_command, broadcast_vars) = \
-                RDD._get_pipe_command(self.ctx, "pipeline", [self.func])
-            class_manifest = self._prev_jrdd.classManifest()
-            python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
-                pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
-                broadcast_vars, class_manifest)
-            self._jrdd_val = python_rdd.asJavaRDD()
+        if self._jrdd_val:
+            return self._jrdd_val
+        funcs = [self.func, self._bypass_serializer]
+        pipe_command = ' '.join(b64enc(cloudpickle.dumps(f)) for f in funcs)
+        broadcast_vars = ListConverter().convert(
+            [x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
+            self.ctx.gateway._gateway_client)
+        self.ctx._pickled_broadcast_vars.clear()
+        class_manifest = self._prev_jrdd.classManifest()
+        python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
+            pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
+            broadcast_vars, class_manifest)
+        self._jrdd_val = python_rdd.asJavaRDD()
         return self._jrdd_val
 
 
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
index 7b3e6966e1..faa1e683c7 100644
--- a/pyspark/pyspark/serializers.py
+++ b/pyspark/pyspark/serializers.py
@@ -1,31 +1,20 @@
-"""
-Data serialization methods.
-
-The Spark Python API is built on top of the Spark Java API.  RDDs created in
-Python are stored in Java as RDD[Array[Byte]].  Python objects are
-automatically serialized/deserialized, so this representation is transparent to
-the end-user.
-"""
-from collections import namedtuple
-import cPickle
 import struct
+import cPickle
 
 
-Serializer = namedtuple("Serializer", ["dumps","loads"])
+def dump_pickle(obj):
+    return cPickle.dumps(obj, 2)
 
 
-PickleSerializer = Serializer(
-    lambda obj: cPickle.dumps(obj, -1),
-    cPickle.loads)
+load_pickle = cPickle.loads
 
 
-def dumps(obj, stream):
-    # TODO: determining the length of non-byte objects.
+def write_with_length(obj, stream):
     stream.write(struct.pack("!i", len(obj)))
     stream.write(obj)
 
 
-def loads(stream):
+def read_with_length(stream):
     length = stream.read(4)
     if length == "":
         raise EOFError
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index 0f90c6ff46..a9ed71892f 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -7,61 +7,41 @@ from base64 import standard_b64decode
 # copy_reg module.
 from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.cloudpickle import CloudPickler
-from pyspark.serializers import dumps, loads, PickleSerializer
-import cPickle
+from pyspark.serializers import write_with_length, read_with_length, \
+    dump_pickle, load_pickle
+
 
 # Redirect stdout to stderr so that users must return values from functions.
 old_stdout = sys.stdout
 sys.stdout = sys.stderr
 
 
-def load_function():
-    return cPickle.loads(standard_b64decode(sys.stdin.readline().strip()))
-
-
-def output(x):
-    dumps(x, old_stdout)
+def load_obj():
+    return load_pickle(standard_b64decode(sys.stdin.readline().strip()))
 
 
 def read_input():
     try:
         while True:
-            yield cPickle.loads(loads(sys.stdin))
+            yield load_pickle(read_with_length(sys.stdin))
     except EOFError:
         return
 
 
-def do_pipeline():
-    f = load_function()
-    for obj in f(read_input()):
-        output(PickleSerializer.dumps(obj))
-
-
-def do_shuffle_map_step():
-    hashFunc = load_function()
-    while True:
-        try:
-            pickled = loads(sys.stdin)
-        except EOFError:
-            return
-        key = cPickle.loads(pickled)[0]
-        output(str(hashFunc(key)))
-        output(pickled)
-
-
 def main():
     num_broadcast_variables = int(sys.stdin.readline().strip())
     for _ in range(num_broadcast_variables):
         uuid = sys.stdin.read(36)
-        value = loads(sys.stdin)
-        _broadcastRegistry[uuid] = Broadcast(uuid, cPickle.loads(value))
-    command = sys.stdin.readline().strip()
-    if command == "pipeline":
-        do_pipeline()
-    elif command == "shuffle_map_step":
-        do_shuffle_map_step()
+        value = read_with_length(sys.stdin)
+        _broadcastRegistry[uuid] = Broadcast(uuid, load_pickle(value))
+    func = load_obj()
+    bypassSerializer = load_obj()
+    if bypassSerializer:
+        dumps = lambda x: x
     else:
-        raise Exception("Unsupported command %s" % command)
+        dumps = dump_pickle
+    for obj in func(read_input()):
+        write_with_length(dumps(obj), old_stdout)
 
 
 if __name__ == '__main__':

From bff6a46359131a8f9bc38b93149b22baa7c711cd Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 25 Aug 2012 18:00:25 -0700
Subject: [PATCH 013/291] Add pipe(), saveAsTextFile(), sc.union() to Python
 API.

---
 .../scala/spark/api/python/PythonRDD.scala    | 10 +++++---
 pyspark/pyspark/context.py                    | 14 ++++++-----
 pyspark/pyspark/rdd.py                        | 25 +++++++++++++++++--
 3 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index b9091fd436..4d3bdb3963 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -9,6 +9,7 @@ import spark._
 import api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
 import broadcast.Broadcast
 import scala.collection
+import java.nio.charset.Charset
 
 trait PythonRDDBase {
   def compute[T](split: Split, envVars: Map[String, String],
@@ -238,9 +239,12 @@ private object Pickle {
   val MARK : Byte = '('
   val APPENDS : Byte = 'e'
 }
-class ExtractValue extends spark.api.java.function.Function[(Array[Byte],
+
+private class ExtractValue extends spark.api.java.function.Function[(Array[Byte],
   Array[Byte]), Array[Byte]] {
-
   override def call(pair: (Array[Byte], Array[Byte])) : Array[Byte] = pair._2
-
+}
+
+private class BytesToString extends spark.api.java.function.Function[Array[Byte], String] {
+  override def call(arr: Array[Byte]) : String = new String(arr, "UTF-8")
 }
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index b8490019e3..04932c93f2 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -7,6 +7,8 @@ from pyspark.java_gateway import launch_gateway
 from pyspark.serializers import dump_pickle, write_with_length
 from pyspark.rdd import RDD
 
+from py4j.java_collections import ListConverter
+
 
 class SparkContext(object):
 
@@ -39,12 +41,6 @@ class SparkContext(object):
         self._jsc = None
 
     def parallelize(self, c, numSlices=None):
-        """
-        >>> sc = SparkContext("local", "test")
-        >>> rdd = sc.parallelize([(1, 2), (3, 4)])
-        >>> rdd.collect()
-        [(1, 2), (3, 4)]
-        """
         numSlices = numSlices or self.defaultParallelism
         # Calling the Java parallelize() method with an ArrayList is too slow,
         # because it sends O(n) Py4J commands.  As an alternative, serialized
@@ -62,6 +58,12 @@ class SparkContext(object):
         jrdd = self._jsc.textFile(name, minSplits)
         return RDD(jrdd, self)
 
+    def union(self, rdds):
+        first = rdds[0]._jrdd
+        rest = [x._jrdd for x in rdds[1:]]
+        rest = ListConverter().convert(rest, self.gateway._gateway_client)
+        return RDD(self._jsc.union(first, rest), self)
+
     def broadcast(self, value):
         jbroadcast = self._jsc.broadcast(bytearray(dump_pickle(value)))
         return Broadcast(jbroadcast.uuid().toString(), value, jbroadcast,
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 21e822ba9f..8477f6dd02 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,6 +1,9 @@
 from base64 import standard_b64encode as b64enc
 from collections import Counter
 from itertools import chain, ifilter, imap
+import shlex
+from subprocess import Popen, PIPE
+from threading import Thread
 
 from pyspark import cloudpickle
 from pyspark.serializers import dump_pickle, load_pickle
@@ -118,7 +121,20 @@ class RDD(object):
         """
         return self.map(lambda x: (f(x), x)).groupByKey(numSplits)
 
-    # TODO: pipe
+    def pipe(self, command, env={}):
+        """
+        >>> sc.parallelize([1, 2, 3]).pipe('cat').collect()
+        ['1', '2', '3']
+        """
+        def func(iterator):
+            pipe = Popen(shlex.split(command), env=env, stdin=PIPE, stdout=PIPE)
+            def pipe_objs(out):
+                for obj in iterator:
+                    out.write(str(obj).rstrip('\n') + '\n')
+                out.close()
+            Thread(target=pipe_objs, args=[pipe.stdin]).start()
+            return (x.rstrip('\n') for x in pipe.stdout)
+        return self.mapPartitions(func)
 
     def foreach(self, f):
         """
@@ -206,7 +222,12 @@ class RDD(object):
         """
         return load_pickle(bytes(self.ctx.asPickle(self._jrdd.first())))
 
-    # TODO: saveAsTextFile
+    def saveAsTextFile(self, path):
+        def func(iterator):
+            return (str(x).encode("utf-8") for x in iterator)
+        keyed = PipelinedRDD(self, func)
+        keyed._bypass_serializer = True
+        keyed._jrdd.map(self.ctx.jvm.BytesToString()).saveAsTextFile(path)
 
     # Pair functions
 

From 414367850982c4f8fc5e63cc94caa422eb736db5 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 27 Aug 2012 00:13:19 -0700
Subject: [PATCH 014/291] Fix minor bugs in Python API examples.

---
 pyspark/pyspark/examples/pi.py | 2 +-
 pyspark/pyspark/examples/tc.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyspark/pyspark/examples/pi.py b/pyspark/pyspark/examples/pi.py
index fe63d2c952..348bbc5dce 100644
--- a/pyspark/pyspark/examples/pi.py
+++ b/pyspark/pyspark/examples/pi.py
@@ -9,7 +9,7 @@ if __name__ == "__main__":
         print >> sys.stderr, \
             "Usage: PythonPi <host> [<slices>]"
         exit(-1)
-    sc = SparkContext(sys.argv[1], "PythonKMeans")
+    sc = SparkContext(sys.argv[1], "PythonPi")
     slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
     n = 100000 * slices
     def f(_):
diff --git a/pyspark/pyspark/examples/tc.py b/pyspark/pyspark/examples/tc.py
index 2796fdc6ad..9630e72b47 100644
--- a/pyspark/pyspark/examples/tc.py
+++ b/pyspark/pyspark/examples/tc.py
@@ -22,9 +22,9 @@ if __name__ == "__main__":
         print >> sys.stderr, \
             "Usage: PythonTC <host> [<slices>]"
         exit(-1)
-    sc = SparkContext(sys.argv[1], "PythonKMeans")
+    sc = SparkContext(sys.argv[1], "PythonTC")
     slices = sys.argv[2] if len(sys.argv) > 2 else 2
-    tc = sc.parallelizePairs(generateGraph(), slices).cache()
+    tc = sc.parallelize(generateGraph(), slices).cache()
 
     # Linear transitive closure: each round grows paths by one edge,
     # by joining the graph's edges with the already-discovered paths.
@@ -32,7 +32,7 @@ if __name__ == "__main__":
     # the graph to obtain the path (x, z).
 
     # Because join() joins on keys, the edges are stored in reversed order.
-    edges = tc.mapPairs(lambda (x, y): (y, x))
+    edges = tc.map(lambda (x, y): (y, x))
 
     oldCount = 0L
     nextCount = tc.count()
@@ -40,7 +40,7 @@ if __name__ == "__main__":
         oldCount = nextCount
         # Perform the join, obtaining an RDD of (y, (z, x)) pairs,
         # then project the result to obtain the new (x, z) paths.
-        new_edges = tc.join(edges).mapPairs(lambda (_, (a, b)): (b, a))
+        new_edges = tc.join(edges).map(lambda (_, (a, b)): (b, a))
         tc = tc.union(new_edges).distinct().cache()
         nextCount = tc.count()
         if nextCount == oldCount:

From 9abdfa663360252d2edb346e6b3df4ff94ce78d7 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 17 Sep 2012 00:08:50 -0700
Subject: [PATCH 015/291] Fix Python 2.6 compatibility in Python API.

---
 pyspark/pyspark/rdd.py | 17 +++++++++++------
 python/tc.py           | 22 ----------------------
 2 files changed, 11 insertions(+), 28 deletions(-)
 delete mode 100644 python/tc.py

diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 8477f6dd02..e2137fe06c 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,5 +1,5 @@
 from base64 import standard_b64encode as b64enc
-from collections import Counter
+from collections import defaultdict
 from itertools import chain, ifilter, imap
 import shlex
 from subprocess import Popen, PIPE
@@ -198,13 +198,18 @@ class RDD(object):
 
     def countByValue(self):
         """
-        >>> sc.parallelize([1, 2, 1, 2, 2]).countByValue().most_common()
-        [(2, 3), (1, 2)]
+        >>> sorted(sc.parallelize([1, 2, 1, 2, 2], 2).countByValue().items())
+        [(1, 2), (2, 3)]
         """
         def countPartition(iterator):
-            yield Counter(iterator)
+            counts = defaultdict(int)
+            for obj in iterator:
+                counts[obj] += 1
+            yield counts
         def mergeMaps(m1, m2):
-            return m1 + m2
+            for (k, v) in m2.iteritems():
+                m1[k] += v
+            return m1
         return self.mapPartitions(countPartition).reduce(mergeMaps)
 
     def take(self, num):
@@ -271,7 +276,7 @@ class RDD(object):
     def countByKey(self):
         """
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
-        >>> rdd.countByKey().most_common()
+        >>> sorted(rdd.countByKey().items())
         [('a', 2), ('b', 1)]
         """
         return self.map(lambda x: x[0]).countByValue()
diff --git a/python/tc.py b/python/tc.py
deleted file mode 100644
index 5dcc4317e0..0000000000
--- a/python/tc.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from rdd import SparkContext
-
-sc = SparkContext("local", "PythonWordCount")
-e = [(1, 2), (2, 3), (4, 1)]
-
-tc = sc.parallelizePairs(e)
-
-edges = tc.mapPairs(lambda (x, y): (y, x))
-
-oldCount = 0
-nextCount = tc.count()
-
-def project(x):
-    return (x[1][1], x[1][0])
-
-while nextCount != oldCount:
-    oldCount = nextCount
-    tc = tc.union(tc.join(edges).mapPairs(project)).distinct()
-    nextCount = tc.count()
-
-print "TC has %i edges" % tc.count()
-print tc.collect()

From 52989c8a2c8c10d7f5610c033f6782e58fd3abc2 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 19 Oct 2012 10:24:49 -0700
Subject: [PATCH 016/291] Update Python API for v0.6.0 compatibility.

---
 .../scala/spark/api/python/PythonRDD.scala     | 18 +++++++++++-------
 .../main/scala/spark/broadcast/Broadcast.scala |  2 +-
 pyspark/pyspark/broadcast.py                   | 18 +++++++++---------
 pyspark/pyspark/context.py                     |  2 +-
 pyspark/pyspark/java_gateway.py                |  3 ++-
 pyspark/pyspark/serializers.py                 | 18 ++++++++++++++----
 pyspark/pyspark/worker.py                      |  8 ++++----
 7 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 4d3bdb3963..528885fe5c 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -5,11 +5,15 @@ import java.io._
 import scala.collection.Map
 import scala.collection.JavaConversions._
 import scala.io.Source
-import spark._
-import api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
-import broadcast.Broadcast
-import scala.collection
-import java.nio.charset.Charset
+
+import spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
+import spark.broadcast.Broadcast
+import spark.SparkEnv
+import spark.Split
+import spark.RDD
+import spark.OneToOneDependency
+import spark.rdd.PipedRDD
+
 
 trait PythonRDDBase {
   def compute[T](split: Split, envVars: Map[String, String],
@@ -43,9 +47,9 @@ trait PythonRDDBase {
         SparkEnv.set(env)
         val out = new PrintWriter(proc.getOutputStream)
         val dOut = new DataOutputStream(proc.getOutputStream)
-        out.println(broadcastVars.length)
+        dOut.writeInt(broadcastVars.length)
         for (broadcast <- broadcastVars) {
-          out.print(broadcast.uuid.toString)
+          dOut.writeLong(broadcast.id)
           dOut.writeInt(broadcast.value.length)
           dOut.write(broadcast.value)
           dOut.flush()
diff --git a/core/src/main/scala/spark/broadcast/Broadcast.scala b/core/src/main/scala/spark/broadcast/Broadcast.scala
index 6055bfd045..2ffe7f741d 100644
--- a/core/src/main/scala/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/spark/broadcast/Broadcast.scala
@@ -5,7 +5,7 @@ import java.util.concurrent.atomic.AtomicLong
 
 import spark._
 
-abstract class Broadcast[T](id: Long) extends Serializable {
+abstract class Broadcast[T](private[spark] val id: Long) extends Serializable {
   def value: T
 
   // We cannot have an abstract readObject here due to some weird issues with
diff --git a/pyspark/pyspark/broadcast.py b/pyspark/pyspark/broadcast.py
index 1ea17d59af..4cff02b36d 100644
--- a/pyspark/pyspark/broadcast.py
+++ b/pyspark/pyspark/broadcast.py
@@ -6,7 +6,7 @@
 [1, 2, 3, 4, 5]
 
 >>> from pyspark.broadcast import _broadcastRegistry
->>> _broadcastRegistry[b.uuid] = b
+>>> _broadcastRegistry[b.bid] = b
 >>> from cPickle import dumps, loads
 >>> loads(dumps(b)).value
 [1, 2, 3, 4, 5]
@@ -14,27 +14,27 @@
 >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
 [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
 """
-# Holds broadcasted data received from Java, keyed by UUID.
+# Holds broadcasted data received from Java, keyed by its id.
 _broadcastRegistry = {}
 
 
-def _from_uuid(uuid):
+def _from_id(bid):
     from pyspark.broadcast import _broadcastRegistry
-    if uuid not in _broadcastRegistry:
-        raise Exception("Broadcast variable '%s' not loaded!" % uuid)
-    return _broadcastRegistry[uuid]
+    if bid not in _broadcastRegistry:
+        raise Exception("Broadcast variable '%s' not loaded!" % bid)
+    return _broadcastRegistry[bid]
 
 
 class Broadcast(object):
-    def __init__(self, uuid, value, java_broadcast=None, pickle_registry=None):
+    def __init__(self, bid, value, java_broadcast=None, pickle_registry=None):
         self.value = value
-        self.uuid = uuid
+        self.bid = bid
         self._jbroadcast = java_broadcast
         self._pickle_registry = pickle_registry
 
     def __reduce__(self):
         self._pickle_registry.add(self)
-        return (_from_uuid, (self.uuid, ))
+        return (_from_id, (self.bid, ))
 
 
 def _test():
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 04932c93f2..3f4db26644 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -66,5 +66,5 @@ class SparkContext(object):
 
     def broadcast(self, value):
         jbroadcast = self._jsc.broadcast(bytearray(dump_pickle(value)))
-        return Broadcast(jbroadcast.uuid().toString(), value, jbroadcast,
+        return Broadcast(jbroadcast.id(), value, jbroadcast,
                          self._pickled_broadcast_vars)
diff --git a/pyspark/pyspark/java_gateway.py b/pyspark/pyspark/java_gateway.py
index bcb405ba72..3726bcbf17 100644
--- a/pyspark/pyspark/java_gateway.py
+++ b/pyspark/pyspark/java_gateway.py
@@ -7,7 +7,8 @@ SPARK_HOME = os.environ["SPARK_HOME"]
 
 
 assembly_jar = glob.glob(os.path.join(SPARK_HOME, "core/target") + \
-    "/spark-core-assembly-*-SNAPSHOT.jar")[0]
+    "/spark-core-assembly-*.jar")[0]
+    # TODO: what if multiple assembly jars are found?
 
 
 def launch_gateway():
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
index faa1e683c7..21ef8b106c 100644
--- a/pyspark/pyspark/serializers.py
+++ b/pyspark/pyspark/serializers.py
@@ -9,16 +9,26 @@ def dump_pickle(obj):
 load_pickle = cPickle.loads
 
 
+def read_long(stream):
+    length = stream.read(8)
+    if length == "":
+        raise EOFError
+    return struct.unpack("!q", length)[0]
+
+
+def read_int(stream):
+    length = stream.read(4)
+    if length == "":
+        raise EOFError
+    return struct.unpack("!i", length)[0]
+
 def write_with_length(obj, stream):
     stream.write(struct.pack("!i", len(obj)))
     stream.write(obj)
 
 
 def read_with_length(stream):
-    length = stream.read(4)
-    if length == "":
-        raise EOFError
-    length = struct.unpack("!i", length)[0]
+    length = read_int(stream)
     obj = stream.read(length)
     if obj == "":
         raise EOFError
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index a9ed71892f..62824a1c9b 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -8,7 +8,7 @@ from base64 import standard_b64decode
 from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.cloudpickle import CloudPickler
 from pyspark.serializers import write_with_length, read_with_length, \
-    dump_pickle, load_pickle
+    read_long, read_int, dump_pickle, load_pickle
 
 
 # Redirect stdout to stderr so that users must return values from functions.
@@ -29,11 +29,11 @@ def read_input():
 
 
 def main():
-    num_broadcast_variables = int(sys.stdin.readline().strip())
+    num_broadcast_variables = read_int(sys.stdin)
     for _ in range(num_broadcast_variables):
-        uuid = sys.stdin.read(36)
+        bid = read_long(sys.stdin)
         value = read_with_length(sys.stdin)
-        _broadcastRegistry[uuid] = Broadcast(uuid, load_pickle(value))
+        _broadcastRegistry[bid] = Broadcast(bid, load_pickle(value))
     func = load_obj()
     bypassSerializer = load_obj()
     if bypassSerializer:

From c23bf1aff4b9a1faf9d32c7b64acad2213f9515c Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 20 Oct 2012 00:16:41 +0000
Subject: [PATCH 017/291] Add PySpark README and run scripts.

---
 core/src/main/scala/spark/SparkContext.scala |  2 +-
 pyspark/README                               | 58 ++++++++++++++++++++
 pyspark/pyspark-shell                        |  3 +
 pyspark/pyspark/context.py                   |  5 +-
 pyspark/pyspark/examples/wordcount.py        | 17 ++++++
 pyspark/pyspark/shell.py                     | 21 +++++++
 pyspark/run-pyspark                          | 23 ++++++++
 7 files changed, 125 insertions(+), 4 deletions(-)
 create mode 100644 pyspark/README
 create mode 100755 pyspark/pyspark-shell
 create mode 100644 pyspark/pyspark/examples/wordcount.py
 create mode 100644 pyspark/pyspark/shell.py
 create mode 100755 pyspark/run-pyspark

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index becf737597..acb38ae33d 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -113,7 +113,7 @@ class SparkContext(
   // Environment variables to pass to our executors
   private[spark] val executorEnvs = HashMap[String, String]()
   for (key <- Seq("SPARK_MEM", "SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS",
-       "SPARK_TESTING")) {
+       "SPARK_TESTING", "PYTHONPATH")) {
     val value = System.getenv(key)
     if (value != null) {
       executorEnvs(key) = value
diff --git a/pyspark/README b/pyspark/README
new file mode 100644
index 0000000000..63a1def141
--- /dev/null
+++ b/pyspark/README
@@ -0,0 +1,58 @@
+# PySpark
+
+PySpark is a Python API for Spark.
+
+PySpark jobs are writen in Python and executed using a standard Python
+interpreter; this supports modules that use Python C extensions.  The
+API is based on the Spark Scala API and uses regular Python functions
+and lambdas to support user-defined functions.  PySpark supports
+interactive use through a standard Python interpreter; it can
+automatically serialize closures and ship them to worker processes.
+
+PySpark is built on top of the Spark Java API.  Data is uniformly
+represented as serialized Python objects and stored in Spark Java
+processes, which communicate with PySpark worker processes over pipes.
+
+## Features
+
+PySpark supports most of the Spark API, including broadcast variables.
+RDDs are dynamically typed and can hold any Python object.
+
+PySpark does not support:
+
+- Special functions on RDDs of doubles
+- Accumulators
+
+## Examples and Documentation
+
+The PySpark source contains docstrings and doctests that document its
+API.  The public classes are in `context.py` and `rdd.py`.
+
+The `pyspark/pyspark/examples` directory contains a few complete
+examples.
+
+## Installing PySpark
+
+PySpark requires a development version of Py4J, a Python library for
+interacting with Java processes.  It can be installed from
+https://github.com/bartdag/py4j; make sure to install a version that
+contains at least the commits through 3dbf380d3d.
+
+PySpark uses the `PYTHONPATH` environment variable to search for Python
+classes; Py4J should be on this path, along with any libraries used by
+PySpark programs.  `PYTHONPATH` will be automatically shipped to worker
+machines, but the files that it points to must be present on each
+machine.
+
+PySpark requires the Spark assembly JAR, which can be created by running
+`sbt/sbt assembly` in the Spark directory.
+
+Additionally, `SPARK_HOME` should be set to the location of the Spark
+package.
+
+## Running PySpark
+
+The easiest way to run PySpark is to use the `run-pyspark` and
+`pyspark-shell` scripts, which are included in the `pyspark` directory.
+These scripts automatically load the `spark-conf.sh` file, set
+`SPARK_HOME`, and add the `pyspark` package to the `PYTHONPATH`.
diff --git a/pyspark/pyspark-shell b/pyspark/pyspark-shell
new file mode 100755
index 0000000000..4ed3e6010c
--- /dev/null
+++ b/pyspark/pyspark-shell
@@ -0,0 +1,3 @@
+#!/bin/sh
+FWDIR="`dirname $0`"
+exec $FWDIR/run-pyspark $FWDIR/pyspark/shell.py "$@"
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 3f4db26644..50d57e5317 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -18,14 +18,13 @@ class SparkContext(object):
     asPickle = jvm.spark.api.python.PythonRDD.asPickle
     arrayAsPickle = jvm.spark.api.python.PythonRDD.arrayAsPickle
 
-    def __init__(self, master, name, defaultParallelism=None,
-                 pythonExec='python'):
+    def __init__(self, master, name, defaultParallelism=None):
         self.master = master
         self.name = name
         self._jsc = self.jvm.JavaSparkContext(master, name)
         self.defaultParallelism = \
             defaultParallelism or self._jsc.sc().defaultParallelism()
-        self.pythonExec = pythonExec
+        self.pythonExec = os.environ.get("PYSPARK_PYTHON_EXEC", 'python')
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
         # been pickled, so it can determine which Java broadcast objects to
diff --git a/pyspark/pyspark/examples/wordcount.py b/pyspark/pyspark/examples/wordcount.py
new file mode 100644
index 0000000000..8365c070e8
--- /dev/null
+++ b/pyspark/pyspark/examples/wordcount.py
@@ -0,0 +1,17 @@
+import sys
+from operator import add
+from pyspark.context import SparkContext
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print >> sys.stderr, \
+            "Usage: PythonWordCount <master> <file>"
+        exit(-1)
+    sc = SparkContext(sys.argv[1], "PythonWordCount")
+    lines = sc.textFile(sys.argv[2], 1)
+    counts = lines.flatMap(lambda x: x.split(' ')) \
+                  .map(lambda x: (x, 1)) \
+                  .reduceByKey(add)
+    output = counts.collect()
+    for (word, count) in output:
+        print "%s : %i" % (word, count)
diff --git a/pyspark/pyspark/shell.py b/pyspark/pyspark/shell.py
new file mode 100644
index 0000000000..7ef30894cb
--- /dev/null
+++ b/pyspark/pyspark/shell.py
@@ -0,0 +1,21 @@
+"""
+An interactive shell.
+"""
+import code
+import sys
+
+from pyspark.context import SparkContext
+
+
+def main(master='local'):
+    sc = SparkContext(master, 'PySparkShell')
+    print "Spark context available as sc."
+    code.interact(local={'sc': sc})
+
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        master = sys.argv[1]
+    else:
+        master = 'local'
+    main(master)
diff --git a/pyspark/run-pyspark b/pyspark/run-pyspark
new file mode 100755
index 0000000000..9c5e027962
--- /dev/null
+++ b/pyspark/run-pyspark
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Figure out where the Scala framework is installed
+FWDIR="$(cd `dirname $0`; cd ../; pwd)"
+
+# Export this as SPARK_HOME
+export SPARK_HOME="$FWDIR"
+
+# Load environment variables from conf/spark-env.sh, if it exists
+if [ -e $FWDIR/conf/spark-env.sh ] ; then
+  . $FWDIR/conf/spark-env.sh
+fi
+
+# Figure out which Python executable to use
+if [ -z "$PYSPARK_PYTHON" ] ; then
+  PYSPARK_PYTHON="python"
+fi
+export PYSPARK_PYTHON
+
+# Add the PySpark classes to the Python path:
+export PYTHONPATH=$SPARK_HOME/pyspark/:$PYTHONPATH
+
+exec "$PYSPARK_PYTHON" "$@"

From d4f2e5b0ef38db9d42bb0d5fbbbe6103ce047efe Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 22 Oct 2012 10:28:59 -0700
Subject: [PATCH 018/291] Remove PYTHONPATH from SparkContext's executorEnvs.

It makes more sense to pass it in the dictionary
of environment variables that is used to construct
PythonRDD.
---
 core/src/main/scala/spark/SparkContext.scala      |  2 +-
 .../main/scala/spark/api/python/PythonRDD.scala   | 15 +++++++--------
 pyspark/pyspark/rdd.py                            |  8 ++++++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index acb38ae33d..becf737597 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -113,7 +113,7 @@ class SparkContext(
   // Environment variables to pass to our executors
   private[spark] val executorEnvs = HashMap[String, String]()
   for (key <- Seq("SPARK_MEM", "SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS",
-       "SPARK_TESTING", "PYTHONPATH")) {
+       "SPARK_TESTING")) {
     val value = System.getenv(key)
     if (value != null) {
       executorEnvs(key) = value
diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 528885fe5c..a593e53efd 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -131,18 +131,17 @@ trait PythonRDDBase {
 }
 
 class PythonRDD[T: ClassManifest](
-  parent: RDD[T], command: Seq[String], envVars: Map[String, String],
+  parent: RDD[T], command: Seq[String], envVars: java.util.Map[String, String],
   preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]])
   extends RDD[Array[Byte]](parent.context) with PythonRDDBase {
 
-  def this(parent: RDD[T], command: Seq[String], preservePartitoning: Boolean,
-    pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
-    this(parent, command, Map(), preservePartitoning, pythonExec, broadcastVars)
-
   // Similar to Runtime.exec(), if we are given a single string, split it into words
   // using a standard StringTokenizer (i.e. by spaces)
-  def this(parent: RDD[T], command: String, preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
-    this(parent, PipedRDD.tokenize(command), preservePartitoning, pythonExec, broadcastVars)
+  def this(parent: RDD[T], command: String, envVars: java.util.Map[String, String],
+    preservePartitoning: Boolean, pythonExec: String,
+    broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
+    this(parent, PipedRDD.tokenize(command), envVars, preservePartitoning, pythonExec,
+      broadcastVars)
 
   override def splits = parent.splits
 
@@ -151,7 +150,7 @@ class PythonRDD[T: ClassManifest](
   override val partitioner = if (preservePartitoning) parent.partitioner else None
 
   override def compute(split: Split): Iterator[Array[Byte]] =
-    compute(split, envVars, command, parent, pythonExec, broadcastVars)
+    compute(split, envVars.toMap, command, parent, pythonExec, broadcastVars)
 
   val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
 }
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index e2137fe06c..e4878c08ba 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,6 +1,7 @@
 from base64 import standard_b64encode as b64enc
 from collections import defaultdict
 from itertools import chain, ifilter, imap
+import os
 import shlex
 from subprocess import Popen, PIPE
 from threading import Thread
@@ -10,7 +11,7 @@ from pyspark.serializers import dump_pickle, load_pickle
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_cogroup
 
-from py4j.java_collections import ListConverter
+from py4j.java_collections import ListConverter, MapConverter
 
 
 class RDD(object):
@@ -447,8 +448,11 @@ class PipelinedRDD(RDD):
             self.ctx.gateway._gateway_client)
         self.ctx._pickled_broadcast_vars.clear()
         class_manifest = self._prev_jrdd.classManifest()
+        env = MapConverter().convert(
+            {'PYTHONPATH' : os.environ.get("PYTHONPATH", "")},
+            self.ctx.gateway._gateway_client)
         python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
-            pipe_command, self.preservesPartitioning, self.ctx.pythonExec,
+            pipe_command, env, self.preservesPartitioning, self.ctx.pythonExec,
             broadcast_vars, class_manifest)
         self._jrdd_val = python_rdd.asJavaRDD()
         return self._jrdd_val

From 7859879aaa1860ff6b383e32a18fd9a410a97416 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 28 Oct 2012 16:46:31 -0700
Subject: [PATCH 019/291] Bump required Py4J version and add test for large
 broadcast variables.

---
 pyspark/README               | 2 +-
 pyspark/pyspark/broadcast.py | 2 ++
 pyspark/requirements.txt     | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyspark/README b/pyspark/README
index 63a1def141..55490e1a83 100644
--- a/pyspark/README
+++ b/pyspark/README
@@ -36,7 +36,7 @@ examples.
 PySpark requires a development version of Py4J, a Python library for
 interacting with Java processes.  It can be installed from
 https://github.com/bartdag/py4j; make sure to install a version that
-contains at least the commits through 3dbf380d3d.
+contains at least the commits through b7924aabe9.
 
 PySpark uses the `PYTHONPATH` environment variable to search for Python
 classes; Py4J should be on this path, along with any libraries used by
diff --git a/pyspark/pyspark/broadcast.py b/pyspark/pyspark/broadcast.py
index 4cff02b36d..93876fa738 100644
--- a/pyspark/pyspark/broadcast.py
+++ b/pyspark/pyspark/broadcast.py
@@ -13,6 +13,8 @@
 
 >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
 [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
+
+>>> large_broadcast = sc.broadcast(list(range(10000)))
 """
 # Holds broadcasted data received from Java, keyed by its id.
 _broadcastRegistry = {}
diff --git a/pyspark/requirements.txt b/pyspark/requirements.txt
index 71e2bc2b89..48fa2ab105 100644
--- a/pyspark/requirements.txt
+++ b/pyspark/requirements.txt
@@ -3,4 +3,4 @@
 # package is not at the root of the git repository.  It may be possible to
 # install Py4J from git once https://github.com/pypa/pip/pull/526 is merged.
 
-# git+git://github.com/bartdag/py4j.git@3dbf380d3d2cdeb9aab394454ea74d80c4aba1ea
+# git+git://github.com/bartdag/py4j.git@b7924aabe9c5e63f0a4d8bbd17019534c7ec014e

From 2ccf3b665280bf5b0919e3801d028126cb070dbd Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 28 Oct 2012 22:30:28 -0700
Subject: [PATCH 020/291] Fix PySpark hash partitioning bug.

A Java array's hashCode is based on its object
identify, not its elements, so this was causing
serialized keys to be hashed incorrectly.

This commit adds a PySpark-specific workaround
and adds more tests.
---
 .../spark/api/python/PythonPartitioner.scala  | 41 +++++++++++++++++++
 .../scala/spark/api/python/PythonRDD.scala    | 10 ++---
 pyspark/pyspark/rdd.py                        | 12 ++++--
 3 files changed, 54 insertions(+), 9 deletions(-)
 create mode 100644 core/src/main/scala/spark/api/python/PythonPartitioner.scala

diff --git a/core/src/main/scala/spark/api/python/PythonPartitioner.scala b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
new file mode 100644
index 0000000000..ef9f808fb2
--- /dev/null
+++ b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
@@ -0,0 +1,41 @@
+package spark.api.python
+
+import spark.Partitioner
+
+import java.util.Arrays
+
+/**
+ * A [[spark.Partitioner]] that performs handling of byte arrays, for use by the Python API.
+ */
+class PythonPartitioner(override val numPartitions: Int) extends Partitioner {
+
+  override def getPartition(key: Any): Int = {
+    if (key == null) {
+      return 0
+    }
+    else {
+      val hashCode = {
+        if (key.isInstanceOf[Array[Byte]]) {
+          System.err.println("Dumping a byte array!" +           Arrays.hashCode(key.asInstanceOf[Array[Byte]])
+          )
+          Arrays.hashCode(key.asInstanceOf[Array[Byte]])
+        }
+        else
+          key.hashCode()
+      }
+      val mod = hashCode % numPartitions
+      if (mod < 0) {
+        mod + numPartitions
+      } else {
+        mod // Guard against negative hash codes
+      }
+    }
+  }
+
+  override def equals(other: Any): Boolean = other match {
+    case h: PythonPartitioner =>
+      h.numPartitions == numPartitions
+    case _ =>
+      false
+  }
+}
diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index a593e53efd..50094d6b0f 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -179,14 +179,12 @@ object PythonRDD {
     val dOut = new DataOutputStream(baos);
     if (elem.isInstanceOf[Array[Byte]]) {
       elem.asInstanceOf[Array[Byte]]
-    } else if (elem.isInstanceOf[scala.Tuple2[_, _]]) {
-      val t = elem.asInstanceOf[scala.Tuple2[_, _]]
-      val t1 = t._1.asInstanceOf[Array[Byte]]
-      val t2 = t._2.asInstanceOf[Array[Byte]]
+    } else if (elem.isInstanceOf[scala.Tuple2[Array[Byte], Array[Byte]]]) {
+      val t = elem.asInstanceOf[scala.Tuple2[Array[Byte], Array[Byte]]]
       dOut.writeByte(Pickle.PROTO)
       dOut.writeByte(Pickle.TWO)
-      dOut.write(PythonRDD.stripPickle(t1))
-      dOut.write(PythonRDD.stripPickle(t2))
+      dOut.write(PythonRDD.stripPickle(t._1))
+      dOut.write(PythonRDD.stripPickle(t._2))
       dOut.writeByte(Pickle.TUPLE2)
       dOut.writeByte(Pickle.STOP)
       baos.toByteArray()
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index e4878c08ba..85a24c6854 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -310,6 +310,12 @@ class RDD(object):
         return python_right_outer_join(self, other, numSplits)
 
     def partitionBy(self, numSplits, hashFunc=hash):
+        """
+        >>> pairs = sc.parallelize([1, 2, 3, 4, 2, 4, 1]).map(lambda x: (x, x))
+        >>> sets = pairs.partitionBy(2).glom().collect()
+        >>> set(sets[0]).intersection(set(sets[1]))
+        set([])
+        """
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
         def add_shuffle_key(iterator):
@@ -319,7 +325,7 @@ class RDD(object):
         keyed = PipelinedRDD(self, add_shuffle_key)
         keyed._bypass_serializer = True
         pairRDD = self.ctx.jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
-        partitioner = self.ctx.jvm.spark.HashPartitioner(numSplits)
+        partitioner = self.ctx.jvm.spark.api.python.PythonPartitioner(numSplits)
         jrdd = pairRDD.partitionBy(partitioner)
         jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
         return RDD(jrdd, self.ctx)
@@ -391,7 +397,7 @@ class RDD(object):
         """
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2)])
-        >>> x.cogroup(y).collect()
+        >>> sorted(x.cogroup(y).collect())
         [('a', ([1], [2])), ('b', ([4], []))]
         """
         return python_cogroup(self, other, numSplits)
@@ -462,7 +468,7 @@ def _test():
     import doctest
     from pyspark.context import SparkContext
     globs = globals().copy()
-    globs['sc'] = SparkContext('local', 'PythonTest')
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
     doctest.testmod(globs=globs)
     globs['sc'].stop()
 

From 531ac136bf4ed333cb906ac229d986605a8207a6 Mon Sep 17 00:00:00 2001
From: Denny <dennybritz@gmail.com>
Date: Mon, 29 Oct 2012 14:53:47 -0700
Subject: [PATCH 021/291] BlockManager UI.

---
 core/src/main/scala/spark/RDD.scala           |   8 ++
 core/src/main/scala/spark/SparkContext.scala  |  10 ++
 .../spark/storage/BlockManagerMaster.scala    |  33 +++++-
 .../scala/spark/storage/BlockManagerUI.scala  | 102 ++++++++++++++++++
 .../src/main/scala/spark/util/AkkaUtils.scala |   5 +-
 .../{deploy => }/common/layout.scala.html     |   0
 .../spark/deploy/master/index.scala.html      |   2 +-
 .../deploy/master/job_details.scala.html      |   2 +-
 .../spark/deploy/worker/index.scala.html      |   2 +-
 .../main/twirl/spark/storage/index.scala.html |  28 +++++
 .../main/twirl/spark/storage/rdd.scala.html   |  65 +++++++++++
 .../twirl/spark/storage/rdd_row.scala.html    |  18 ++++
 .../twirl/spark/storage/rdd_table.scala.html  |  18 ++++
 13 files changed, 283 insertions(+), 10 deletions(-)
 create mode 100644 core/src/main/scala/spark/storage/BlockManagerUI.scala
 rename core/src/main/twirl/spark/{deploy => }/common/layout.scala.html (100%)
 create mode 100644 core/src/main/twirl/spark/storage/index.scala.html
 create mode 100644 core/src/main/twirl/spark/storage/rdd.scala.html
 create mode 100644 core/src/main/twirl/spark/storage/rdd_row.scala.html
 create mode 100644 core/src/main/twirl/spark/storage/rdd_table.scala.html

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 338dff4061..dc757dc6aa 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -107,6 +107,12 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
   // Variables relating to persistence
   private var storageLevel: StorageLevel = StorageLevel.NONE
   
+  /* Assign a name to this RDD */
+  def name(name: String) = {
+    sc.rddNames(this.id) = name
+    this
+  }
+
   /** 
    * Set this RDD's storage level to persist its values across operations after the first time
    * it is computed. Can only be called once on each RDD.
@@ -118,6 +124,8 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
         "Cannot change storage level of an RDD after it was already assigned a level")
     }
     storageLevel = newLevel
+    // Register the RDD with the SparkContext
+    sc.persistentRdds(id) = this
     this
   }
 
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index d26cccbfe1..71c9dcd017 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -1,6 +1,7 @@
 package spark
 
 import java.io._
+import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicInteger
 import java.net.{URI, URLClassLoader}
 
@@ -102,10 +103,19 @@ class SparkContext(
     isLocal)
   SparkEnv.set(env)
 
+  // Start the BlockManager UI
+  spark.storage.BlockManagerUI.start(SparkEnv.get.actorSystem, 
+    SparkEnv.get.blockManager.master.masterActor, this)
+
   // Used to store a URL for each static file/jar together with the file's local timestamp
   private[spark] val addedFiles = HashMap[String, Long]()
   private[spark] val addedJars = HashMap[String, Long]()
 
+  // Keeps track of all persisted RDDs
+  private[spark] val persistentRdds = new ConcurrentHashMap[Int, RDD[_]]()
+  // A HashMap for friendly RDD Names
+  private[spark] val rddNames = new ConcurrentHashMap[Int, String]()
+
   // Add each JAR given through the constructor
   jars.foreach { addJar(_) }
 
diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
index ace27e758c..d12a16869a 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
@@ -3,7 +3,8 @@ package spark.storage
 import java.io._
 import java.util.{HashMap => JHashMap}
 
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.collection.JavaConverters._
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
 import scala.util.Random
 
 import akka.actor._
@@ -90,6 +91,15 @@ case object StopBlockManagerMaster extends ToBlockManagerMaster
 private[spark]
 case object GetMemoryStatus extends ToBlockManagerMaster
 
+private[spark]
+case class GetStorageStatus extends ToBlockManagerMaster
+
+private[spark]
+case class BlockStatus(storageLevel: StorageLevel, memSize: Long, diskSize: Long)
+
+private[spark]
+case class StorageStatus(maxMem: Long, remainingMem: Long, blocks: Map[String, BlockStatus])
+
 
 private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
 
@@ -99,7 +109,8 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
       val maxMem: Long) {
     private var _lastSeenMs = timeMs
     private var _remainingMem = maxMem
-    private val _blocks = new JHashMap[String, StorageLevel]
+
+    private val _blocks = new JHashMap[String, BlockStatus]
 
     logInfo("Registering block manager %s:%d with %s RAM".format(
       blockManagerId.ip, blockManagerId.port, Utils.memoryBytesToString(maxMem)))
@@ -115,7 +126,7 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
 
       if (_blocks.containsKey(blockId)) {
         // The block exists on the slave already.
-        val originalLevel: StorageLevel = _blocks.get(blockId)
+        val originalLevel: StorageLevel = _blocks.get(blockId).storageLevel
 
         if (originalLevel.useMemory) {
           _remainingMem += memSize
@@ -124,7 +135,7 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
 
       if (storageLevel.isValid) {
         // isValid means it is either stored in-memory or on-disk.
-        _blocks.put(blockId, storageLevel)
+        _blocks.put(blockId, BlockStatus(storageLevel, memSize, diskSize))
         if (storageLevel.useMemory) {
           _remainingMem -= memSize
           logInfo("Added %s in memory on %s:%d (size: %s, free: %s)".format(
@@ -137,7 +148,7 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
         }
       } else if (_blocks.containsKey(blockId)) {
         // If isValid is not true, drop the block.
-        val originalLevel: StorageLevel = _blocks.get(blockId)
+        val originalLevel: StorageLevel = _blocks.get(blockId).storageLevel
         _blocks.remove(blockId)
         if (originalLevel.useMemory) {
           _remainingMem += memSize
@@ -152,6 +163,8 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
       }
     }
 
+    def blocks: JHashMap[String, BlockStatus] = _blocks
+
     def remainingMem: Long = _remainingMem
 
     def lastSeenMs: Long = _lastSeenMs
@@ -198,6 +211,9 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
     case GetMemoryStatus =>
       getMemoryStatus
 
+    case GetStorageStatus =>
+      getStorageStatus
+
     case RemoveHost(host) =>
       removeHost(host)
       sender ! true
@@ -219,6 +235,13 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
     sender ! res
   }
 
+  private def getStorageStatus() {
+    val res = blockManagerInfo.map { case(blockManagerId, info) =>
+      StorageStatus(info.maxMem, info.remainingMem, info.blocks.asScala)
+    }
+    sender ! res
+  }
+
   private def register(blockManagerId: BlockManagerId, maxMemSize: Long) {
     val startTimeMs = System.currentTimeMillis()
     val tmp = " " + blockManagerId + " "
diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
new file mode 100644
index 0000000000..c168f60c35
--- /dev/null
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -0,0 +1,102 @@
+package spark.storage
+
+import akka.actor.{ActorRef, ActorSystem}
+import akka.dispatch.Await
+import akka.pattern.ask
+import akka.util.Timeout
+import akka.util.duration._
+import cc.spray.Directives
+import cc.spray.directives._
+import cc.spray.typeconversion.TwirlSupport._
+import scala.collection.mutable.ArrayBuffer
+import spark.{Logging, SparkContext, SparkEnv}
+import spark.util.AkkaUtils
+
+private[spark]
+object BlockManagerUI extends Logging {
+
+  /* Starts the Web interface for the BlockManager */
+  def start(actorSystem : ActorSystem, masterActor: ActorRef, sc: SparkContext) {
+    val webUIDirectives = new BlockManagerUIDirectives(actorSystem, masterActor, sc)
+    try {
+      logInfo("Starting BlockManager WebUI.")
+      val port = Option(System.getenv("BLOCKMANAGER_UI_PORT")).getOrElse("9080").toInt
+      AkkaUtils.startSprayServer(actorSystem, "0.0.0.0", port, webUIDirectives.handler, "BlockManagerHTTPServer")
+    } catch {
+      case e: Exception =>
+        logError("Failed to create BlockManager WebUI", e)
+        System.exit(1)
+    }
+  }
+
+}
+
+private[spark]
+case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel, numPartitions: Int, memSize: Long, diskSize: Long)
+
+private[spark]
+class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef, sc: SparkContext) extends Directives {  
+
+  val STATIC_RESOURCE_DIR = "spark/deploy/static"
+  implicit val timeout = Timeout(1 seconds)
+
+  val handler = {
+    
+    get { path("") { completeWith {
+      // Request the current storage status from the Master
+      val future = master ? GetStorageStatus
+      future.map { status =>
+        val storageStati = status.asInstanceOf[ArrayBuffer[StorageStatus]]
+        
+        // Calculate macro-level statistics
+        val maxMem = storageStati.map(_.maxMem).reduce(_+_)
+        val remainingMem = storageStati.map(_.remainingMem).reduce(_+_)
+        val diskSpaceUsed = storageStati.flatMap(_.blocks.values.map(_.diskSize))
+          .reduceOption(_+_).getOrElse(0L)
+
+        // Filter out everything that's not and rdd.
+        val rddBlocks = storageStati.flatMap(_.blocks).filter { case(k,v) => k.startsWith("rdd") }.toMap
+        val rdds = rddInfoFromBlockStati(rddBlocks)
+
+        spark.storage.html.index.render(maxMem, remainingMem, diskSpaceUsed, rdds.toList)
+      }
+    }}} ~
+    get { path("rdd") { parameter("id") { id => { completeWith {
+      val future = master ? GetStorageStatus
+      future.map { status =>
+        val prefix = "rdd_" + id.toString
+
+        val storageStati = status.asInstanceOf[ArrayBuffer[StorageStatus]]
+        val rddBlocks = storageStati.flatMap(_.blocks).filter { case(k,v) => k.startsWith(prefix) }.toMap
+        val rddInfo = rddInfoFromBlockStati(rddBlocks).first
+
+        spark.storage.html.rdd.render(rddInfo, rddBlocks)
+
+      }
+    }}}}} ~
+    pathPrefix("static") {
+      getFromResourceDirectory(STATIC_RESOURCE_DIR)
+    }
+
+  }
+
+  private def rddInfoFromBlockStati(infos: Map[String, BlockStatus]) : Array[RDDInfo] = {
+    infos.groupBy { case(k,v) =>
+      // Group by rdd name, ignore the partition name
+      k.substring(0,k.lastIndexOf('_'))
+    }.map { case(k,v) =>
+      val blockStati = v.map(_._2).toArray
+      // Add up memory and disk sizes
+      val tmp = blockStati.map { x => (x.memSize, x.diskSize)}.reduce { (x,y) => 
+        (x._1 + y._1, x._2 + y._2)
+      }
+      // Get the friendly name for the rdd, if available.
+      // This is pretty hacky, is there a better way?
+      val rddId = k.split("_").last.toInt
+      val rddName : String = Option(sc.rddNames.get(rddId)).getOrElse(k)
+      val rddStorageLevel = sc.persistentRdds.get(rddId).getStorageLevel
+      RDDInfo(rddId, rddName, rddStorageLevel, blockStati.length, tmp._1, tmp._2)
+    }.toArray
+  }
+
+}
diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/spark/util/AkkaUtils.scala
index b466b5239c..13bc0f8ccc 100644
--- a/core/src/main/scala/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/spark/util/AkkaUtils.scala
@@ -50,12 +50,13 @@ private[spark] object AkkaUtils {
    * Creates a Spray HTTP server bound to a given IP and port with a given Spray Route object to
    * handle requests. Throws a SparkException if this fails.
    */
-  def startSprayServer(actorSystem: ActorSystem, ip: String, port: Int, route: Route) {
+  def startSprayServer(actorSystem: ActorSystem, ip: String, port: Int, route: Route, 
+    name: String = "HttpServer") {
     val ioWorker = new IoWorker(actorSystem).start()
     val httpService = actorSystem.actorOf(Props(new HttpService(route)))
     val rootService = actorSystem.actorOf(Props(new SprayCanRootService(httpService)))
     val server = actorSystem.actorOf(
-      Props(new HttpServer(ioWorker, SingletonHandler(rootService))), name = "HttpServer")
+      Props(new HttpServer(ioWorker, SingletonHandler(rootService))), name = name)
     actorSystem.registerOnTermination { ioWorker.stop() }
     val timeout = 3.seconds
     val future = server.ask(HttpServer.Bind(ip, port))(timeout)
diff --git a/core/src/main/twirl/spark/deploy/common/layout.scala.html b/core/src/main/twirl/spark/common/layout.scala.html
similarity index 100%
rename from core/src/main/twirl/spark/deploy/common/layout.scala.html
rename to core/src/main/twirl/spark/common/layout.scala.html
diff --git a/core/src/main/twirl/spark/deploy/master/index.scala.html b/core/src/main/twirl/spark/deploy/master/index.scala.html
index 7562076b00..2e15fe2200 100644
--- a/core/src/main/twirl/spark/deploy/master/index.scala.html
+++ b/core/src/main/twirl/spark/deploy/master/index.scala.html
@@ -1,7 +1,7 @@
 @(state: spark.deploy.MasterState)
 @import spark.deploy.master._
 
-@spark.deploy.common.html.layout(title = "Spark Master on " + state.uri) {
+@spark.common.html.layout(title = "Spark Master on " + state.uri) {
       
   <!-- Cluster Details -->
   <div class="row">
diff --git a/core/src/main/twirl/spark/deploy/master/job_details.scala.html b/core/src/main/twirl/spark/deploy/master/job_details.scala.html
index dcf41c28f2..d02a51b214 100644
--- a/core/src/main/twirl/spark/deploy/master/job_details.scala.html
+++ b/core/src/main/twirl/spark/deploy/master/job_details.scala.html
@@ -1,6 +1,6 @@
 @(job: spark.deploy.master.JobInfo)
 
-@spark.deploy.common.html.layout(title = "Job Details") {
+@spark.common.html.layout(title = "Job Details") {
       
   <!-- Job Details -->
   <div class="row">
diff --git a/core/src/main/twirl/spark/deploy/worker/index.scala.html b/core/src/main/twirl/spark/deploy/worker/index.scala.html
index 69746ed02c..40c2d81d77 100644
--- a/core/src/main/twirl/spark/deploy/worker/index.scala.html
+++ b/core/src/main/twirl/spark/deploy/worker/index.scala.html
@@ -1,6 +1,6 @@
 @(worker: spark.deploy.WorkerState)
 
-@spark.deploy.common.html.layout(title = "Spark Worker on " + worker.uri) {
+@spark.common.html.layout(title = "Spark Worker on " + worker.uri) {
       
   <!-- Worker Details -->
   <div class="row">
diff --git a/core/src/main/twirl/spark/storage/index.scala.html b/core/src/main/twirl/spark/storage/index.scala.html
new file mode 100644
index 0000000000..fa7dad51ee
--- /dev/null
+++ b/core/src/main/twirl/spark/storage/index.scala.html
@@ -0,0 +1,28 @@
+@(maxMem: Long, remainingMem: Long, diskSpaceUsed: Long, rdds: List[spark.storage.RDDInfo])
+
+@spark.common.html.layout(title = "Storage Dashboard") {
+  
+  <!-- High-Level Information -->
+  <div class="row">
+    <div class="span12">
+      <ul class="unstyled">
+        <li><strong>Memory:</strong> 
+          @{spark.Utils.memoryBytesToString(maxMem - remainingMem)} Used 
+          (@{spark.Utils.memoryBytesToString(remainingMem)} Available) </li>
+        <li><strong>Disk:</strong> @{spark.Utils.memoryBytesToString(diskSpaceUsed)} Used </li>
+      </ul>
+    </div>
+  </div>
+
+  <hr/>
+
+  <!-- RDD Summary (Running) -->
+  <div class="row">
+    <div class="span12">
+      <h3> RDD Summary </h3>
+      <br/>
+       @rdd_table(rdds)
+    </div>
+  </div>
+
+}
\ No newline at end of file
diff --git a/core/src/main/twirl/spark/storage/rdd.scala.html b/core/src/main/twirl/spark/storage/rdd.scala.html
new file mode 100644
index 0000000000..3a70326efe
--- /dev/null
+++ b/core/src/main/twirl/spark/storage/rdd.scala.html
@@ -0,0 +1,65 @@
+@(rddInfo: spark.storage.RDDInfo, blocks: Map[String, spark.storage.BlockStatus])
+
+@spark.common.html.layout(title = "RDD Info ") {
+  
+  <!-- High-Level Information -->
+  <div class="row">
+    <div class="span12">
+      <ul class="unstyled">
+        <li>
+          <strong>Storage Level:</strong> 
+          @(if (rddInfo.storageLevel.useDisk) "Disk" else "")
+          @(if (rddInfo.storageLevel.useMemory) "Memory" else "")
+          @(if (rddInfo.storageLevel.deserialized) "Deserialized" else "")
+          @(rddInfo.storageLevel.replication)x Replicated
+        <li>
+          <strong>Partitions:</strong>
+          @(rddInfo.numPartitions)
+        </li>
+        <li>
+          <strong>Memory Size:</strong>
+          @{spark.Utils.memoryBytesToString(rddInfo.memSize)}
+        </li>
+        <li>
+          <strong>Disk Size:</strong>
+          @{spark.Utils.memoryBytesToString(rddInfo.diskSize)}
+        </li>
+      </ul>
+    </div>
+  </div>
+
+  <hr/>
+
+  <!-- RDD Summary -->
+  <div class="row">
+    <div class="span12">
+      <h3> RDD Summary </h3>
+      <br/>
+      
+      <!-- Block Table Summary -->
+      <table class="table table-bordered table-striped table-condensed sortable">
+        <thead>
+          <tr>
+            <th>Block Name</th>
+            <th>Storage Level</th>
+            <th>Size in Memory</th>
+            <th>Size on Disk</th>
+          </tr>
+        </thead>
+        <tbody>
+          @blocks.map { case (k,v) => 
+            <tr>
+              <td>@k</td>
+              <td>@v.storageLevel</td>
+              <td>@{spark.Utils.memoryBytesToString(v.memSize)}</td>
+              <td>@{spark.Utils.memoryBytesToString(v.diskSize)}</td>
+            </tr>
+          }
+        </tbody>
+      </table>
+
+
+    </div>
+  </div>
+
+}
\ No newline at end of file
diff --git a/core/src/main/twirl/spark/storage/rdd_row.scala.html b/core/src/main/twirl/spark/storage/rdd_row.scala.html
new file mode 100644
index 0000000000..3dd9944e3b
--- /dev/null
+++ b/core/src/main/twirl/spark/storage/rdd_row.scala.html
@@ -0,0 +1,18 @@
+@(rdd: spark.storage.RDDInfo)
+
+<tr>
+  <td>
+    <a href="rdd?id=@(rdd.id)">
+      @rdd.name
+    </a>
+  </td>
+  <td>
+    @(if (rdd.storageLevel.useDisk) "Disk" else "")
+    @(if (rdd.storageLevel.useMemory) "Memory" else "")
+    @(if (rdd.storageLevel.deserialized) "Deserialized" else "")
+    @(rdd.storageLevel.replication)x Replicated
+  </td>
+  <td>@rdd.numPartitions</td>
+  <td>@{spark.Utils.memoryBytesToString(rdd.memSize)}</td>
+  <td>@{spark.Utils.memoryBytesToString(rdd.diskSize)}</td>
+</tr>
\ No newline at end of file
diff --git a/core/src/main/twirl/spark/storage/rdd_table.scala.html b/core/src/main/twirl/spark/storage/rdd_table.scala.html
new file mode 100644
index 0000000000..24f55ccefb
--- /dev/null
+++ b/core/src/main/twirl/spark/storage/rdd_table.scala.html
@@ -0,0 +1,18 @@
+@(rdds: List[spark.storage.RDDInfo])
+
+<table class="table table-bordered table-striped table-condensed sortable">
+  <thead>
+    <tr>
+      <th>RDD Name</th>
+      <th>Storage Level</th>
+      <th>Partitions</th>
+      <th>Size in Memory</th>
+      <th>Size on Disk</th>
+    </tr>
+  </thead>
+  <tbody>
+    @for(rdd <- rdds) {
+      @rdd_row(rdd)
+    }
+  </tbody>
+</table>
\ No newline at end of file

From eb95212f4d24dbcd734922f39d51e6fdeaeb4c8b Mon Sep 17 00:00:00 2001
From: Denny <dennybritz@gmail.com>
Date: Mon, 29 Oct 2012 14:57:32 -0700
Subject: [PATCH 022/291] code Formatting

---
 .../scala/spark/storage/BlockManagerUI.scala    | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
index c168f60c35..635c096c87 100644
--- a/core/src/main/scala/spark/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -21,7 +21,8 @@ object BlockManagerUI extends Logging {
     try {
       logInfo("Starting BlockManager WebUI.")
       val port = Option(System.getenv("BLOCKMANAGER_UI_PORT")).getOrElse("9080").toInt
-      AkkaUtils.startSprayServer(actorSystem, "0.0.0.0", port, webUIDirectives.handler, "BlockManagerHTTPServer")
+      AkkaUtils.startSprayServer(actorSystem, "0.0.0.0", port, 
+        webUIDirectives.handler, "BlockManagerHTTPServer")
     } catch {
       case e: Exception =>
         logError("Failed to create BlockManager WebUI", e)
@@ -32,10 +33,12 @@ object BlockManagerUI extends Logging {
 }
 
 private[spark]
-case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel, numPartitions: Int, memSize: Long, diskSize: Long)
+case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel, 
+  numPartitions: Int, memSize: Long, diskSize: Long)
 
 private[spark]
-class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef, sc: SparkContext) extends Directives {  
+class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef, 
+  sc: SparkContext) extends Directives {  
 
   val STATIC_RESOURCE_DIR = "spark/deploy/static"
   implicit val timeout = Timeout(1 seconds)
@@ -55,7 +58,9 @@ class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef, s
           .reduceOption(_+_).getOrElse(0L)
 
         // Filter out everything that's not and rdd.
-        val rddBlocks = storageStati.flatMap(_.blocks).filter { case(k,v) => k.startsWith("rdd") }.toMap
+        val rddBlocks = storageStati.flatMap(_.blocks).filter { case(k,v) => 
+          k.startsWith("rdd") 
+        }.toMap
         val rdds = rddInfoFromBlockStati(rddBlocks)
 
         spark.storage.html.index.render(maxMem, remainingMem, diskSpaceUsed, rdds.toList)
@@ -67,7 +72,9 @@ class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef, s
         val prefix = "rdd_" + id.toString
 
         val storageStati = status.asInstanceOf[ArrayBuffer[StorageStatus]]
-        val rddBlocks = storageStati.flatMap(_.blocks).filter { case(k,v) => k.startsWith(prefix) }.toMap
+        val rddBlocks = storageStati.flatMap(_.blocks).filter { case(k,v) => 
+          k.startsWith(prefix) 
+        }.toMap
         val rddInfo = rddInfoFromBlockStati(rddBlocks).first
 
         spark.storage.html.rdd.render(rddInfo, rddBlocks)

From ceec1a1a6abb1fd03316e7fcc532d7e121d5bf65 Mon Sep 17 00:00:00 2001
From: Denny <dennybritz@gmail.com>
Date: Mon, 29 Oct 2012 15:03:01 -0700
Subject: [PATCH 023/291] Nicer storage level format on RDD page

---
 core/src/main/twirl/spark/storage/rdd.scala.html | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/core/src/main/twirl/spark/storage/rdd.scala.html b/core/src/main/twirl/spark/storage/rdd.scala.html
index 3a70326efe..075289c826 100644
--- a/core/src/main/twirl/spark/storage/rdd.scala.html
+++ b/core/src/main/twirl/spark/storage/rdd.scala.html
@@ -50,7 +50,12 @@
           @blocks.map { case (k,v) => 
             <tr>
               <td>@k</td>
-              <td>@v.storageLevel</td>
+              <td>
+                @(if (v.storageLevel.useDisk) "Disk" else "")
+                @(if (v.storageLevel.useMemory) "Memory" else "")
+                @(if (v.storageLevel.deserialized) "Deserialized" else "")
+                @(v.storageLevel.replication)x Replicated
+              </td>
               <td>@{spark.Utils.memoryBytesToString(v.memSize)}</td>
               <td>@{spark.Utils.memoryBytesToString(v.diskSize)}</td>
             </tr>

From 4a1be7e0dbf0031d85b91dc1132fe101d87ba097 Mon Sep 17 00:00:00 2001
From: Denny <dennybritz@gmail.com>
Date: Mon, 12 Nov 2012 10:56:35 -0800
Subject: [PATCH 024/291] Refactor BlockManager UI and adding worker details.

---
 core/src/main/scala/spark/RDD.scala           |  7 +-
 core/src/main/scala/spark/SparkContext.scala  |  2 -
 .../spark/storage/BlockManagerMaster.scala    | 11 +--
 .../scala/spark/storage/BlockManagerUI.scala  | 51 ++++--------
 .../scala/spark/storage/StorageLevel.scala    |  9 +++
 .../scala/spark/storage/StorageUtils.scala    | 78 +++++++++++++++++++
 .../main/twirl/spark/storage/index.scala.html | 22 ++++--
 .../main/twirl/spark/storage/rdd.scala.html   | 35 +++++----
 .../twirl/spark/storage/rdd_row.scala.html    | 18 -----
 .../twirl/spark/storage/rdd_table.scala.html  | 16 +++-
 .../spark/storage/worker_table.scala.html     | 24 ++++++
 11 files changed, 186 insertions(+), 87 deletions(-)
 create mode 100644 core/src/main/scala/spark/storage/StorageUtils.scala
 delete mode 100644 core/src/main/twirl/spark/storage/rdd_row.scala.html
 create mode 100644 core/src/main/twirl/spark/storage/worker_table.scala.html

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index dc757dc6aa..3669bda2d2 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -86,6 +86,9 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
   @transient val dependencies: List[Dependency[_]]
 
   // Methods available on all RDDs:
+
+  // A friendly name for this RDD
+  var name: String = null
   
   /** Record user function generating this RDD. */
   private[spark] val origin = Utils.getSparkCallSite
@@ -108,8 +111,8 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
   private var storageLevel: StorageLevel = StorageLevel.NONE
   
   /* Assign a name to this RDD */
-  def name(name: String) = {
-    sc.rddNames(this.id) = name
+  def setName(_name: String) = {
+    name = _name
     this
   }
 
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 71c9dcd017..7ea0f6f9e0 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -113,8 +113,6 @@ class SparkContext(
 
   // Keeps track of all persisted RDDs
   private[spark] val persistentRdds = new ConcurrentHashMap[Int, RDD[_]]()
-  // A HashMap for friendly RDD Names
-  private[spark] val rddNames = new ConcurrentHashMap[Int, String]()
 
   // Add each JAR given through the constructor
   jars.foreach { addJar(_) }
diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
index 3fc9b629c1..beafdda9d1 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
@@ -4,7 +4,7 @@ import java.io._
 import java.util.{HashMap => JHashMap}
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.util.Random
 
 import akka.actor._
@@ -95,10 +95,7 @@ private[spark]
 case class GetStorageStatus extends ToBlockManagerMaster
 
 private[spark]
-case class BlockStatus(storageLevel: StorageLevel, memSize: Long, diskSize: Long)
-
-private[spark]
-case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long, remainingMem: Long, blocks: Map[String, BlockStatus])
+case class BlockStatus(blockManagerId: BlockManagerId, storageLevel: StorageLevel, memSize: Long, diskSize: Long)
 
 
 private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
@@ -135,7 +132,7 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
 
       if (storageLevel.isValid) {
         // isValid means it is either stored in-memory or on-disk.
-        _blocks.put(blockId, BlockStatus(storageLevel, memSize, diskSize))
+        _blocks.put(blockId, BlockStatus(blockManagerId, storageLevel, memSize, diskSize))
         if (storageLevel.useMemory) {
           _remainingMem -= memSize
           logInfo("Added %s in memory on %s:%d (size: %s, free: %s)".format(
@@ -237,7 +234,7 @@ private[spark] class BlockManagerMasterActor(val isLocal: Boolean) extends Actor
 
   private def getStorageStatus() {
     val res = blockManagerInfo.map { case(blockManagerId, info) =>
-      StorageStatus(blockManagerId, info.maxMem, info.remainingMem, info.blocks.asScala)
+      StorageStatus(blockManagerId, info.maxMem, info.blocks.asScala.toMap)
     }
     sender ! res
   }
diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
index 635c096c87..35cbd59280 100644
--- a/core/src/main/scala/spark/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -12,6 +12,7 @@ import scala.collection.mutable.ArrayBuffer
 import spark.{Logging, SparkContext, SparkEnv}
 import spark.util.AkkaUtils
 
+
 private[spark]
 object BlockManagerUI extends Logging {
 
@@ -32,9 +33,6 @@ object BlockManagerUI extends Logging {
 
 }
 
-private[spark]
-case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel, 
-  numPartitions: Int, memSize: Long, diskSize: Long)
 
 private[spark]
 class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef, 
@@ -49,21 +47,17 @@ class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef,
       // Request the current storage status from the Master
       val future = master ? GetStorageStatus
       future.map { status =>
-        val storageStati = status.asInstanceOf[ArrayBuffer[StorageStatus]]
+        val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
         
         // Calculate macro-level statistics
-        val maxMem = storageStati.map(_.maxMem).reduce(_+_)
-        val remainingMem = storageStati.map(_.remainingMem).reduce(_+_)
-        val diskSpaceUsed = storageStati.flatMap(_.blocks.values.map(_.diskSize))
+        val maxMem = storageStatusList.map(_.maxMem).reduce(_+_)
+        val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_)
+        val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize))
           .reduceOption(_+_).getOrElse(0L)
 
-        // Filter out everything that's not and rdd.
-        val rddBlocks = storageStati.flatMap(_.blocks).filter { case(k,v) => 
-          k.startsWith("rdd") 
-        }.toMap
-        val rdds = rddInfoFromBlockStati(rddBlocks)
+        val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc)
 
-        spark.storage.html.index.render(maxMem, remainingMem, diskSpaceUsed, rdds.toList)
+        spark.storage.html.index.render(maxMem, remainingMem, diskSpaceUsed, rdds, storageStatusList)
       }
     }}} ~
     get { path("rdd") { parameter("id") { id => { completeWith {
@@ -71,13 +65,13 @@ class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef,
       future.map { status =>
         val prefix = "rdd_" + id.toString
 
-        val storageStati = status.asInstanceOf[ArrayBuffer[StorageStatus]]
-        val rddBlocks = storageStati.flatMap(_.blocks).filter { case(k,v) => 
-          k.startsWith(prefix) 
-        }.toMap
-        val rddInfo = rddInfoFromBlockStati(rddBlocks).first
 
-        spark.storage.html.rdd.render(rddInfo, rddBlocks)
+        val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
+        val filteredStorageStatusList = StorageUtils.filterStorageStatusByPrefix(storageStatusList, prefix)
+
+        val rddInfo = StorageUtils.rddInfoFromStorageStatus(filteredStorageStatusList, sc).first
+
+        spark.storage.html.rdd.render(rddInfo, filteredStorageStatusList)
 
       }
     }}}}} ~
@@ -87,23 +81,6 @@ class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef,
 
   }
 
-  private def rddInfoFromBlockStati(infos: Map[String, BlockStatus]) : Array[RDDInfo] = {
-    infos.groupBy { case(k,v) =>
-      // Group by rdd name, ignore the partition name
-      k.substring(0,k.lastIndexOf('_'))
-    }.map { case(k,v) =>
-      val blockStati = v.map(_._2).toArray
-      // Add up memory and disk sizes
-      val tmp = blockStati.map { x => (x.memSize, x.diskSize)}.reduce { (x,y) => 
-        (x._1 + y._1, x._2 + y._2)
-      }
-      // Get the friendly name for the rdd, if available.
-      // This is pretty hacky, is there a better way?
-      val rddId = k.split("_").last.toInt
-      val rddName : String = Option(sc.rddNames.get(rddId)).getOrElse(k)
-      val rddStorageLevel = sc.persistentRdds.get(rddId).getStorageLevel
-      RDDInfo(rddId, rddName, rddStorageLevel, blockStati.length, tmp._1, tmp._2)
-    }.toArray
-  }
+  
 
 }
diff --git a/core/src/main/scala/spark/storage/StorageLevel.scala b/core/src/main/scala/spark/storage/StorageLevel.scala
index c497f03e0c..97d8c7566d 100644
--- a/core/src/main/scala/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/spark/storage/StorageLevel.scala
@@ -68,6 +68,15 @@ class StorageLevel(
 
   override def toString: String =
     "StorageLevel(%b, %b, %b, %d)".format(useDisk, useMemory, deserialized, replication)
+
+  def description : String = {
+    var result = ""
+    result += (if (useDisk) "Disk " else "")
+    result += (if (useMemory) "Memory " else "")
+    result += (if (deserialized) "Deserialized " else "Serialized")
+    result += "%sx Replicated".format(replication)
+    result
+  }
 }
 
 object StorageLevel {
diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/spark/storage/StorageUtils.scala
new file mode 100644
index 0000000000..ebc7390ee5
--- /dev/null
+++ b/core/src/main/scala/spark/storage/StorageUtils.scala
@@ -0,0 +1,78 @@
+package spark.storage
+
+import spark.SparkContext
+
+private[spark]
+case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long, 
+  blocks: Map[String, BlockStatus]) {
+  
+  def memUsed(blockPrefix: String = "") = {
+    blocks.filterKeys(_.startsWith(blockPrefix)).values.map(_.memSize).
+      reduceOption(_+_).getOrElse(0l)
+  }
+
+  def diskUsed(blockPrefix: String = "") = {
+    blocks.filterKeys(_.startsWith(blockPrefix)).values.map(_.diskSize).
+      reduceOption(_+_).getOrElse(0l)
+  }
+
+  def memRemaining : Long = maxMem - memUsed()
+
+}
+
+case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel, 
+  numPartitions: Int, memSize: Long, diskSize: Long, locations: Array[BlockManagerId])
+
+
+/* Helper methods for storage-related objects */
+private[spark]
+object StorageUtils {
+
+  /* Given the current storage status of the BlockManager, returns information for each RDD */ 
+  def rddInfoFromStorageStatus(storageStatusList: Array[StorageStatus], 
+    sc: SparkContext) : Array[RDDInfo] = {
+    rddInfoFromBlockStatusList(storageStatusList.flatMap(_.blocks).toMap, sc) 
+  }
+
+  /* Given a list of BlockStatus objets, returns information for each RDD */ 
+  def rddInfoFromBlockStatusList(infos: Map[String, BlockStatus], 
+    sc: SparkContext) : Array[RDDInfo] = {
+    // Find all RDD Blocks (ignore broadcast variables)
+    val rddBlocks = infos.filterKeys(_.startsWith("rdd"))
+
+    // Group by rddId, ignore the partition name
+    val groupedRddBlocks = infos.groupBy { case(k, v) =>
+      k.substring(0,k.lastIndexOf('_'))
+    }.mapValues(_.values.toArray)
+
+    // For each RDD, generate an RDDInfo object
+    groupedRddBlocks.map { case(rddKey, rddBlocks) =>
+
+      // Add up memory and disk sizes
+      val memSize = rddBlocks.map(_.memSize).reduce(_ + _)
+      val diskSize = rddBlocks.map(_.diskSize).reduce(_ + _)
+
+      // Find the id of the RDD, e.g. rdd_1 => 1
+      val rddId = rddKey.split("_").last.toInt
+      // Get the friendly name for the rdd, if available.
+      val rddName = Option(sc.persistentRdds.get(rddId).name).getOrElse(rddKey)
+      val rddStorageLevel = sc.persistentRdds.get(rddId).getStorageLevel
+      
+      RDDInfo(rddId, rddName, rddStorageLevel, rddBlocks.length, memSize, diskSize,
+        rddBlocks.map(_.blockManagerId))
+    }.toArray
+  }
+
+  /* Removes all BlockStatus object that are not part of a block prefix */ 
+  def filterStorageStatusByPrefix(storageStatusList: Array[StorageStatus], 
+    prefix: String) : Array[StorageStatus] = {
+
+    storageStatusList.map { status =>
+      val newBlocks = status.blocks.filterKeys(_.startsWith(prefix))
+      //val newRemainingMem = status.maxMem - newBlocks.values.map(_.memSize).reduce(_ + _)
+      StorageStatus(status.blockManagerId, status.maxMem, newBlocks)
+    }
+
+  }
+
+}
\ No newline at end of file
diff --git a/core/src/main/twirl/spark/storage/index.scala.html b/core/src/main/twirl/spark/storage/index.scala.html
index fa7dad51ee..2b337f6133 100644
--- a/core/src/main/twirl/spark/storage/index.scala.html
+++ b/core/src/main/twirl/spark/storage/index.scala.html
@@ -1,4 +1,5 @@
-@(maxMem: Long, remainingMem: Long, diskSpaceUsed: Long, rdds: List[spark.storage.RDDInfo])
+@(maxMem: Long, remainingMem: Long, diskSpaceUsed: Long, rdds: Array[spark.storage.RDDInfo], storageStatusList: Array[spark.storage.StorageStatus])
+@import spark.Utils
 
 @spark.common.html.layout(title = "Storage Dashboard") {
   
@@ -7,16 +8,16 @@
     <div class="span12">
       <ul class="unstyled">
         <li><strong>Memory:</strong> 
-          @{spark.Utils.memoryBytesToString(maxMem - remainingMem)} Used 
-          (@{spark.Utils.memoryBytesToString(remainingMem)} Available) </li>
-        <li><strong>Disk:</strong> @{spark.Utils.memoryBytesToString(diskSpaceUsed)} Used </li>
+          @{Utils.memoryBytesToString(maxMem - remainingMem)} Used 
+          (@{Utils.memoryBytesToString(remainingMem)} Available) </li>
+        <li><strong>Disk:</strong> @{Utils.memoryBytesToString(diskSpaceUsed)} Used </li>
       </ul>
     </div>
   </div>
 
   <hr/>
 
-  <!-- RDD Summary (Running) -->
+  <!-- RDD Summary -->
   <div class="row">
     <div class="span12">
       <h3> RDD Summary </h3>
@@ -25,4 +26,15 @@
     </div>
   </div>
 
+  <hr/>
+
+  <!-- Worker Summary -->
+  <div class="row">
+    <div class="span12">
+      <h3> Worker Summary </h3>
+      <br/>
+       @worker_table(storageStatusList)
+    </div>
+  </div>
+
 }
\ No newline at end of file
diff --git a/core/src/main/twirl/spark/storage/rdd.scala.html b/core/src/main/twirl/spark/storage/rdd.scala.html
index 075289c826..ac7f8c981f 100644
--- a/core/src/main/twirl/spark/storage/rdd.scala.html
+++ b/core/src/main/twirl/spark/storage/rdd.scala.html
@@ -1,4 +1,5 @@
-@(rddInfo: spark.storage.RDDInfo, blocks: Map[String, spark.storage.BlockStatus])
+@(rddInfo: spark.storage.RDDInfo, storageStatusList: Array[spark.storage.StorageStatus])
+@import spark.Utils
 
 @spark.common.html.layout(title = "RDD Info ") {
   
@@ -8,21 +9,18 @@
       <ul class="unstyled">
         <li>
           <strong>Storage Level:</strong> 
-          @(if (rddInfo.storageLevel.useDisk) "Disk" else "")
-          @(if (rddInfo.storageLevel.useMemory) "Memory" else "")
-          @(if (rddInfo.storageLevel.deserialized) "Deserialized" else "")
-          @(rddInfo.storageLevel.replication)x Replicated
+          @(rddInfo.storageLevel.description)
         <li>
           <strong>Partitions:</strong>
           @(rddInfo.numPartitions)
         </li>
         <li>
           <strong>Memory Size:</strong>
-          @{spark.Utils.memoryBytesToString(rddInfo.memSize)}
+          @{Utils.memoryBytesToString(rddInfo.memSize)}
         </li>
         <li>
           <strong>Disk Size:</strong>
-          @{spark.Utils.memoryBytesToString(rddInfo.diskSize)}
+          @{Utils.memoryBytesToString(rddInfo.diskSize)}
         </li>
       </ul>
     </div>
@@ -36,6 +34,7 @@
       <h3> RDD Summary </h3>
       <br/>
       
+
       <!-- Block Table Summary -->
       <table class="table table-bordered table-striped table-condensed sortable">
         <thead>
@@ -47,17 +46,14 @@
           </tr>
         </thead>
         <tbody>
-          @blocks.map { case (k,v) => 
+          @storageStatusList.flatMap(_.blocks).toArray.sortWith(_._1 < _._1).map { case (k,v) => 
             <tr>
               <td>@k</td>
               <td>
-                @(if (v.storageLevel.useDisk) "Disk" else "")
-                @(if (v.storageLevel.useMemory) "Memory" else "")
-                @(if (v.storageLevel.deserialized) "Deserialized" else "")
-                @(v.storageLevel.replication)x Replicated
+                 @(v.storageLevel.description)
               </td>
-              <td>@{spark.Utils.memoryBytesToString(v.memSize)}</td>
-              <td>@{spark.Utils.memoryBytesToString(v.diskSize)}</td>
+              <td>@{Utils.memoryBytesToString(v.memSize)}</td>
+              <td>@{Utils.memoryBytesToString(v.diskSize)}</td>
             </tr>
           }
         </tbody>
@@ -67,4 +63,15 @@
     </div>
   </div>
 
+  <hr/>
+
+  <!-- Worker Table -->
+  <div class="row">
+    <div class="span12">
+      <h3> Worker Summary </h3>
+      <br/>
+       @worker_table(storageStatusList, "rdd_" + rddInfo.id )
+    </div>
+  </div>
+
 }
\ No newline at end of file
diff --git a/core/src/main/twirl/spark/storage/rdd_row.scala.html b/core/src/main/twirl/spark/storage/rdd_row.scala.html
deleted file mode 100644
index 3dd9944e3b..0000000000
--- a/core/src/main/twirl/spark/storage/rdd_row.scala.html
+++ /dev/null
@@ -1,18 +0,0 @@
-@(rdd: spark.storage.RDDInfo)
-
-<tr>
-  <td>
-    <a href="rdd?id=@(rdd.id)">
-      @rdd.name
-    </a>
-  </td>
-  <td>
-    @(if (rdd.storageLevel.useDisk) "Disk" else "")
-    @(if (rdd.storageLevel.useMemory) "Memory" else "")
-    @(if (rdd.storageLevel.deserialized) "Deserialized" else "")
-    @(rdd.storageLevel.replication)x Replicated
-  </td>
-  <td>@rdd.numPartitions</td>
-  <td>@{spark.Utils.memoryBytesToString(rdd.memSize)}</td>
-  <td>@{spark.Utils.memoryBytesToString(rdd.diskSize)}</td>
-</tr>
\ No newline at end of file
diff --git a/core/src/main/twirl/spark/storage/rdd_table.scala.html b/core/src/main/twirl/spark/storage/rdd_table.scala.html
index 24f55ccefb..af801cf229 100644
--- a/core/src/main/twirl/spark/storage/rdd_table.scala.html
+++ b/core/src/main/twirl/spark/storage/rdd_table.scala.html
@@ -1,4 +1,5 @@
-@(rdds: List[spark.storage.RDDInfo])
+@(rdds: Array[spark.storage.RDDInfo])
+@import spark.Utils
 
 <table class="table table-bordered table-striped table-condensed sortable">
   <thead>
@@ -12,7 +13,18 @@
   </thead>
   <tbody>
     @for(rdd <- rdds) {
-      @rdd_row(rdd)
+      <tr>
+        <td>
+          <a href="rdd?id=@(rdd.id)">
+            @rdd.name
+          </a>
+        </td>
+        <td>@(rdd.storageLevel.description)
+        </td>
+        <td>@rdd.numPartitions</td>
+        <td>@{Utils.memoryBytesToString(rdd.memSize)}</td>
+        <td>@{Utils.memoryBytesToString(rdd.diskSize)}</td>
+      </tr>
     }
   </tbody>
 </table>
\ No newline at end of file
diff --git a/core/src/main/twirl/spark/storage/worker_table.scala.html b/core/src/main/twirl/spark/storage/worker_table.scala.html
new file mode 100644
index 0000000000..d54b8de4cc
--- /dev/null
+++ b/core/src/main/twirl/spark/storage/worker_table.scala.html
@@ -0,0 +1,24 @@
+@(workersStatusList: Array[spark.storage.StorageStatus], prefix: String = "")
+@import spark.Utils
+
+<table class="table table-bordered table-striped table-condensed sortable">
+  <thead>
+    <tr>
+      <th>Host</th>
+      <th>Memory Usage</th>
+      <th>Disk Usage</th>
+    </tr>
+  </thead>
+  <tbody>
+    @for(status <- workersStatusList) {
+      <tr>
+        <td>@(status.blockManagerId.ip + ":" + status.blockManagerId.port)</td>
+        <td>
+          @(Utils.memoryBytesToString(status.memUsed(prefix)))
+          (@(Utils.memoryBytesToString(status.memRemaining)) Total Available)
+        </td>
+        <td>@(Utils.memoryBytesToString(status.diskUsed(prefix)))</td>
+    </tr>
+    }
+  </tbody>
+</table>
\ No newline at end of file

From ccd075cf960df6c6c449b709515cdd81499a52be Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 24 Dec 2012 15:01:13 -0800
Subject: [PATCH 025/291] Reduce object overhead in Pyspark shuffle and collect

---
 pyspark/pyspark/rdd.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 85a24c6854..708ea6eb55 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -145,8 +145,10 @@ class RDD(object):
         self.map(f).collect()  # Force evaluation
 
     def collect(self):
-        pickle = self.ctx.arrayAsPickle(self._jrdd.rdd().collect())
-        return load_pickle(bytes(pickle))
+        def asList(iterator):
+            yield list(iterator)
+        pickles = self.mapPartitions(asList)._jrdd.rdd().collect()
+        return list(chain.from_iterable(load_pickle(bytes(p)) for p in pickles))
 
     def reduce(self, f):
         """
@@ -319,16 +321,23 @@ class RDD(object):
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
         def add_shuffle_key(iterator):
+            buckets = defaultdict(list)
             for (k, v) in iterator:
-                yield str(hashFunc(k))
-                yield dump_pickle((k, v))
+                buckets[hashFunc(k) % numSplits].append((k, v))
+            for (split, items) in buckets.iteritems():
+                yield str(split)
+                yield dump_pickle(items)
         keyed = PipelinedRDD(self, add_shuffle_key)
         keyed._bypass_serializer = True
         pairRDD = self.ctx.jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
         partitioner = self.ctx.jvm.spark.api.python.PythonPartitioner(numSplits)
+        # Transferring O(n) objects to Java is too expensive.  Instead, we'll
+        # form the hash buckets in Python, transferring O(numSplits) objects
+        # to Java.  Each object is a (splitNumber, [objects]) pair.
         jrdd = pairRDD.partitionBy(partitioner)
         jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
-        return RDD(jrdd, self.ctx)
+        # Flatten the resulting RDD:
+        return RDD(jrdd, self.ctx).flatMap(lambda items: items)
 
     def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
                      numSplits=None):

From 4608902fb87af64a15b97ab21fe6382cd6e5a644 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 24 Dec 2012 17:20:10 -0800
Subject: [PATCH 026/291] Use filesystem to collect RDDs in PySpark.

Passing large volumes of data through Py4J seems
to be slow.  It appears to be faster to write the
data to the local filesystem and read it back from
Python.
---
 .../scala/spark/api/python/PythonRDD.scala    | 66 +++++++------------
 pyspark/pyspark/context.py                    |  9 ++-
 pyspark/pyspark/rdd.py                        | 34 ++++++++--
 pyspark/pyspark/serializers.py                |  8 +++
 pyspark/pyspark/worker.py                     | 12 +---
 5 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 50094d6b0f..4f870e837a 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -1,6 +1,7 @@
 package spark.api.python
 
 import java.io._
+import java.util.{List => JList}
 
 import scala.collection.Map
 import scala.collection.JavaConversions._
@@ -59,36 +60,7 @@ trait PythonRDDBase {
         }
         out.flush()
         for (elem <- parent.iterator(split)) {
-          if (elem.isInstanceOf[Array[Byte]]) {
-            val arr = elem.asInstanceOf[Array[Byte]]
-            dOut.writeInt(arr.length)
-            dOut.write(arr)
-          } else if (elem.isInstanceOf[scala.Tuple2[_, _]]) {
-            val t = elem.asInstanceOf[scala.Tuple2[_, _]]
-            val t1 = t._1.asInstanceOf[Array[Byte]]
-            val t2 = t._2.asInstanceOf[Array[Byte]]
-            val length = t1.length + t2.length - 3 - 3 + 4  // stripPickle() removes 3 bytes
-            dOut.writeInt(length)
-            dOut.writeByte(Pickle.PROTO)
-            dOut.writeByte(Pickle.TWO)
-            dOut.write(PythonRDD.stripPickle(t1))
-            dOut.write(PythonRDD.stripPickle(t2))
-            dOut.writeByte(Pickle.TUPLE2)
-            dOut.writeByte(Pickle.STOP)
-          } else if (elem.isInstanceOf[String]) {
-            // For uniformity, strings are wrapped into Pickles.
-            val s = elem.asInstanceOf[String].getBytes("UTF-8")
-            val length = 2 + 1 + 4 + s.length + 1
-            dOut.writeInt(length)
-            dOut.writeByte(Pickle.PROTO)
-            dOut.writeByte(Pickle.TWO)
-            dOut.writeByte(Pickle.BINUNICODE)
-            dOut.writeInt(Integer.reverseBytes(s.length))
-            dOut.write(s)
-            dOut.writeByte(Pickle.STOP)
-          } else {
-            throw new Exception("Unexpected RDD type")
-          }
+          PythonRDD.writeAsPickle(elem, dOut)
         }
         dOut.flush()
         out.flush()
@@ -174,36 +146,45 @@ object PythonRDD {
     arr.slice(2, arr.length - 1)
   }
 
-  def asPickle(elem: Any) : Array[Byte] = {
-    val baos = new ByteArrayOutputStream();
-    val dOut = new DataOutputStream(baos);
+  /**
+   * Write strings, pickled Python objects, or pairs of pickled objects to a data output stream.
+   * The data format is a 32-bit integer representing the pickled object's length (in bytes),
+   * followed by the pickled data.
+   * @param elem the object to write
+   * @param dOut a data output stream
+   */
+  def writeAsPickle(elem: Any, dOut: DataOutputStream) {
     if (elem.isInstanceOf[Array[Byte]]) {
-      elem.asInstanceOf[Array[Byte]]
+      val arr = elem.asInstanceOf[Array[Byte]]
+      dOut.writeInt(arr.length)
+      dOut.write(arr)
     } else if (elem.isInstanceOf[scala.Tuple2[Array[Byte], Array[Byte]]]) {
       val t = elem.asInstanceOf[scala.Tuple2[Array[Byte], Array[Byte]]]
+      val length = t._1.length + t._2.length - 3 - 3 + 4  // stripPickle() removes 3 bytes
+      dOut.writeInt(length)
       dOut.writeByte(Pickle.PROTO)
       dOut.writeByte(Pickle.TWO)
       dOut.write(PythonRDD.stripPickle(t._1))
       dOut.write(PythonRDD.stripPickle(t._2))
       dOut.writeByte(Pickle.TUPLE2)
       dOut.writeByte(Pickle.STOP)
-      baos.toByteArray()
     } else if (elem.isInstanceOf[String]) {
       // For uniformity, strings are wrapped into Pickles.
       val s = elem.asInstanceOf[String].getBytes("UTF-8")
+      val length = 2 + 1 + 4 + s.length + 1
+      dOut.writeInt(length)
       dOut.writeByte(Pickle.PROTO)
       dOut.writeByte(Pickle.TWO)
       dOut.write(Pickle.BINUNICODE)
       dOut.writeInt(Integer.reverseBytes(s.length))
       dOut.write(s)
       dOut.writeByte(Pickle.STOP)
-      baos.toByteArray()
     } else {
       throw new Exception("Unexpected RDD type")
     }
   }
 
-  def pickleFile(sc: JavaSparkContext, filename: String, parallelism: Int) :
+  def readRDDFromPickleFile(sc: JavaSparkContext, filename: String, parallelism: Int) :
   JavaRDD[Array[Byte]] = {
     val file = new DataInputStream(new FileInputStream(filename))
     val objs = new collection.mutable.ArrayBuffer[Array[Byte]]
@@ -221,11 +202,12 @@ object PythonRDD {
     JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
   }
 
-  def arrayAsPickle(arr : Any) : Array[Byte] = {
-    val pickles : Array[Byte] = arr.asInstanceOf[Array[Any]].map(asPickle).map(stripPickle).flatten
-
-    Array[Byte](Pickle.PROTO, Pickle.TWO, Pickle.EMPTY_LIST, Pickle.MARK) ++ pickles ++
-      Array[Byte] (Pickle.APPENDS, Pickle.STOP)
+  def writeArrayToPickleFile[T](items: Array[T], filename: String) {
+    val file = new DataOutputStream(new FileOutputStream(filename))
+    for (item <- items) {
+      writeAsPickle(item, file)
+    }
+    file.close()
   }
 }
 
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 50d57e5317..19f9f9e133 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -14,9 +14,8 @@ class SparkContext(object):
 
     gateway = launch_gateway()
     jvm = gateway.jvm
-    pickleFile = jvm.spark.api.python.PythonRDD.pickleFile
-    asPickle = jvm.spark.api.python.PythonRDD.asPickle
-    arrayAsPickle = jvm.spark.api.python.PythonRDD.arrayAsPickle
+    readRDDFromPickleFile = jvm.PythonRDD.readRDDFromPickleFile
+    writeArrayToPickleFile = jvm.PythonRDD.writeArrayToPickleFile
 
     def __init__(self, master, name, defaultParallelism=None):
         self.master = master
@@ -45,11 +44,11 @@ class SparkContext(object):
         # because it sends O(n) Py4J commands.  As an alternative, serialized
         # objects are written to a file and loaded through textFile().
         tempFile = NamedTemporaryFile(delete=False)
+        atexit.register(lambda: os.unlink(tempFile.name))
         for x in c:
             write_with_length(dump_pickle(x), tempFile)
         tempFile.close()
-        atexit.register(lambda: os.unlink(tempFile.name))
-        jrdd = self.pickleFile(self._jsc, tempFile.name, numSlices)
+        jrdd = self.readRDDFromPickleFile(self._jsc, tempFile.name, numSlices)
         return RDD(jrdd, self)
 
     def textFile(self, name, minSplits=None):
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 708ea6eb55..01908cff96 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,13 +1,15 @@
+import atexit
 from base64 import standard_b64encode as b64enc
 from collections import defaultdict
 from itertools import chain, ifilter, imap
 import os
 import shlex
 from subprocess import Popen, PIPE
+from tempfile import NamedTemporaryFile
 from threading import Thread
 
 from pyspark import cloudpickle
-from pyspark.serializers import dump_pickle, load_pickle
+from pyspark.serializers import dump_pickle, load_pickle, read_from_pickle_file
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_cogroup
 
@@ -145,10 +147,30 @@ class RDD(object):
         self.map(f).collect()  # Force evaluation
 
     def collect(self):
+        # To minimize the number of transfers between Python and Java, we'll
+        # flatten each partition into a list before collecting it.  Due to
+        # pipelining, this should add minimal overhead.
         def asList(iterator):
             yield list(iterator)
-        pickles = self.mapPartitions(asList)._jrdd.rdd().collect()
-        return list(chain.from_iterable(load_pickle(bytes(p)) for p in pickles))
+        picklesInJava = self.mapPartitions(asList)._jrdd.rdd().collect()
+        return list(chain.from_iterable(self._collect_array_through_file(picklesInJava)))
+
+    def _collect_array_through_file(self, array):
+        # Transferring lots of data through Py4J can be slow because
+        # socket.readline() is inefficient.  Instead, we'll dump the data to a
+        # file and read it back.
+        tempFile = NamedTemporaryFile(delete=False)
+        tempFile.close()
+        def clean_up_file():
+            try: os.unlink(tempFile.name)
+            except: pass
+        atexit.register(clean_up_file)
+        self.ctx.writeArrayToPickleFile(array, tempFile.name)
+        # Read the data into Python and deserialize it:
+        with open(tempFile.name, 'rb') as tempFile:
+            for item in read_from_pickle_file(tempFile):
+                yield item
+        os.unlink(tempFile.name)
 
     def reduce(self, f):
         """
@@ -220,15 +242,15 @@ class RDD(object):
         >>> sc.parallelize([2, 3, 4]).take(2)
         [2, 3]
         """
-        pickle = self.ctx.arrayAsPickle(self._jrdd.rdd().take(num))
-        return load_pickle(bytes(pickle))
+        picklesInJava = self._jrdd.rdd().take(num)
+        return list(self._collect_array_through_file(picklesInJava))
 
     def first(self):
         """
         >>> sc.parallelize([2, 3, 4]).first()
         2
         """
-        return load_pickle(bytes(self.ctx.asPickle(self._jrdd.first())))
+        return self.take(1)[0]
 
     def saveAsTextFile(self, path):
         def func(iterator):
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
index 21ef8b106c..bfcdda8f12 100644
--- a/pyspark/pyspark/serializers.py
+++ b/pyspark/pyspark/serializers.py
@@ -33,3 +33,11 @@ def read_with_length(stream):
     if obj == "":
         raise EOFError
     return obj
+
+
+def read_from_pickle_file(stream):
+    try:
+        while True:
+            yield load_pickle(read_with_length(stream))
+    except EOFError:
+        return
diff --git a/pyspark/pyspark/worker.py b/pyspark/pyspark/worker.py
index 62824a1c9b..9f6b507dbd 100644
--- a/pyspark/pyspark/worker.py
+++ b/pyspark/pyspark/worker.py
@@ -8,7 +8,7 @@ from base64 import standard_b64decode
 from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.cloudpickle import CloudPickler
 from pyspark.serializers import write_with_length, read_with_length, \
-    read_long, read_int, dump_pickle, load_pickle
+    read_long, read_int, dump_pickle, load_pickle, read_from_pickle_file
 
 
 # Redirect stdout to stderr so that users must return values from functions.
@@ -20,14 +20,6 @@ def load_obj():
     return load_pickle(standard_b64decode(sys.stdin.readline().strip()))
 
 
-def read_input():
-    try:
-        while True:
-            yield load_pickle(read_with_length(sys.stdin))
-    except EOFError:
-        return
-
-
 def main():
     num_broadcast_variables = read_int(sys.stdin)
     for _ in range(num_broadcast_variables):
@@ -40,7 +32,7 @@ def main():
         dumps = lambda x: x
     else:
         dumps = dump_pickle
-    for obj in func(read_input()):
+    for obj in func(read_from_pickle_file(sys.stdin)):
         write_with_length(dumps(obj), old_stdout)
 
 

From e2dad15621f5dc15275b300df05483afde5025a0 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Wed, 26 Dec 2012 17:34:24 -0800
Subject: [PATCH 027/291] Add support for batched serialization of Python
 objects in PySpark.

---
 pyspark/pyspark/context.py     |  3 +-
 pyspark/pyspark/rdd.py         | 57 +++++++++++++++++++++++-----------
 pyspark/pyspark/serializers.py | 34 +++++++++++++++++++-
 3 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 19f9f9e133..032619693a 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -17,13 +17,14 @@ class SparkContext(object):
     readRDDFromPickleFile = jvm.PythonRDD.readRDDFromPickleFile
     writeArrayToPickleFile = jvm.PythonRDD.writeArrayToPickleFile
 
-    def __init__(self, master, name, defaultParallelism=None):
+    def __init__(self, master, name, defaultParallelism=None, batchSize=-1):
         self.master = master
         self.name = name
         self._jsc = self.jvm.JavaSparkContext(master, name)
         self.defaultParallelism = \
             defaultParallelism or self._jsc.sc().defaultParallelism()
         self.pythonExec = os.environ.get("PYSPARK_PYTHON_EXEC", 'python')
+        self.batchSize = batchSize  # -1 represents a unlimited batch size
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
         # been pickled, so it can determine which Java broadcast objects to
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 01908cff96..d7081dffd2 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -2,6 +2,7 @@ import atexit
 from base64 import standard_b64encode as b64enc
 from collections import defaultdict
 from itertools import chain, ifilter, imap
+import operator
 import os
 import shlex
 from subprocess import Popen, PIPE
@@ -9,7 +10,8 @@ from tempfile import NamedTemporaryFile
 from threading import Thread
 
 from pyspark import cloudpickle
-from pyspark.serializers import dump_pickle, load_pickle, read_from_pickle_file
+from pyspark.serializers import batched, Batch, dump_pickle, load_pickle, \
+    read_from_pickle_file
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_cogroup
 
@@ -83,6 +85,11 @@ class RDD(object):
         >>> rdd = sc.parallelize([1, 1, 2, 3])
         >>> rdd.union(rdd).collect()
         [1, 1, 2, 3, 1, 1, 2, 3]
+
+        # Union of batched and unbatched RDDs:
+        >>> batchedRDD = sc.parallelize([Batch([1, 2, 3, 4, 5])])
+        >>> rdd.union(batchedRDD).collect()
+        [1, 1, 2, 3, 1, 2, 3, 4, 5]
         """
         return RDD(self._jrdd.union(other._jrdd), self.ctx)
 
@@ -147,13 +154,8 @@ class RDD(object):
         self.map(f).collect()  # Force evaluation
 
     def collect(self):
-        # To minimize the number of transfers between Python and Java, we'll
-        # flatten each partition into a list before collecting it.  Due to
-        # pipelining, this should add minimal overhead.
-        def asList(iterator):
-            yield list(iterator)
-        picklesInJava = self.mapPartitions(asList)._jrdd.rdd().collect()
-        return list(chain.from_iterable(self._collect_array_through_file(picklesInJava)))
+        picklesInJava = self._jrdd.rdd().collect()
+        return list(self._collect_array_through_file(picklesInJava))
 
     def _collect_array_through_file(self, array):
         # Transferring lots of data through Py4J can be slow because
@@ -214,12 +216,21 @@ class RDD(object):
 
     # TODO: aggregate
 
+    def sum(self):
+        """
+        >>> sc.parallelize([1.0, 2.0, 3.0]).sum()
+        6.0
+        """
+        return self.mapPartitions(lambda x: [sum(x)]).reduce(operator.add)
+
     def count(self):
         """
         >>> sc.parallelize([2, 3, 4]).count()
-        3L
+        3
+        >>> sc.parallelize([Batch([2, 3, 4])]).count()
+        3
         """
-        return self._jrdd.count()
+        return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
 
     def countByValue(self):
         """
@@ -342,24 +353,23 @@ class RDD(object):
         """
         if numSplits is None:
             numSplits = self.ctx.defaultParallelism
+        # Transferring O(n) objects to Java is too expensive.  Instead, we'll
+        # form the hash buckets in Python, transferring O(numSplits) objects
+        # to Java.  Each object is a (splitNumber, [objects]) pair.
         def add_shuffle_key(iterator):
             buckets = defaultdict(list)
             for (k, v) in iterator:
                 buckets[hashFunc(k) % numSplits].append((k, v))
             for (split, items) in buckets.iteritems():
                 yield str(split)
-                yield dump_pickle(items)
+                yield dump_pickle(Batch(items))
         keyed = PipelinedRDD(self, add_shuffle_key)
         keyed._bypass_serializer = True
         pairRDD = self.ctx.jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
         partitioner = self.ctx.jvm.spark.api.python.PythonPartitioner(numSplits)
-        # Transferring O(n) objects to Java is too expensive.  Instead, we'll
-        # form the hash buckets in Python, transferring O(numSplits) objects
-        # to Java.  Each object is a (splitNumber, [objects]) pair.
         jrdd = pairRDD.partitionBy(partitioner)
         jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
-        # Flatten the resulting RDD:
-        return RDD(jrdd, self.ctx).flatMap(lambda items: items)
+        return RDD(jrdd, self.ctx)
 
     def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
                      numSplits=None):
@@ -478,8 +488,19 @@ class PipelinedRDD(RDD):
     def _jrdd(self):
         if self._jrdd_val:
             return self._jrdd_val
-        funcs = [self.func, self._bypass_serializer]
-        pipe_command = ' '.join(b64enc(cloudpickle.dumps(f)) for f in funcs)
+        func = self.func
+        if not self._bypass_serializer and self.ctx.batchSize != 1:
+            oldfunc = self.func
+            batchSize = self.ctx.batchSize
+            if batchSize == -1:  # unlimited batch size
+                def batched_func(iterator):
+                    yield Batch(list(oldfunc(iterator)))
+            else:
+                def batched_func(iterator):
+                    return batched(oldfunc(iterator), batchSize)
+            func = batched_func
+        cmds = [func, self._bypass_serializer]
+        pipe_command = ' '.join(b64enc(cloudpickle.dumps(f)) for f in cmds)
         broadcast_vars = ListConverter().convert(
             [x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
             self.ctx.gateway._gateway_client)
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
index bfcdda8f12..4ed925697c 100644
--- a/pyspark/pyspark/serializers.py
+++ b/pyspark/pyspark/serializers.py
@@ -2,6 +2,33 @@ import struct
 import cPickle
 
 
+class Batch(object):
+    """
+    Used to store multiple RDD entries as a single Java object.
+
+    This relieves us from having to explicitly track whether an RDD
+    is stored as batches of objects and avoids problems when processing
+    the union() of batched and unbatched RDDs (e.g. the union() of textFile()
+    with another RDD).
+    """
+    def __init__(self, items):
+        self.items = items
+
+
+def batched(iterator, batchSize):
+    items = []
+    count = 0
+    for item in iterator:
+        items.append(item)
+        count += 1
+        if count == batchSize:
+            yield Batch(items)
+            items = []
+            count = []
+    if items:
+        yield Batch(items)
+
+
 def dump_pickle(obj):
     return cPickle.dumps(obj, 2)
 
@@ -38,6 +65,11 @@ def read_with_length(stream):
 def read_from_pickle_file(stream):
     try:
         while True:
-            yield load_pickle(read_with_length(stream))
+            obj = load_pickle(read_with_length(stream))
+            if type(obj) == Batch:  # We don't care about inheritance
+                for item in obj.items:
+                    yield item
+            else:
+                yield obj
     except EOFError:
         return

From 1dca0c51804b9c94709ec9cc0544b8dfb7afe59f Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Wed, 26 Dec 2012 18:23:06 -0800
Subject: [PATCH 028/291] Remove debug output from PythonPartitioner.

---
 core/src/main/scala/spark/api/python/PythonPartitioner.scala | 2 --
 1 file changed, 2 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonPartitioner.scala b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
index ef9f808fb2..606a80d1eb 100644
--- a/core/src/main/scala/spark/api/python/PythonPartitioner.scala
+++ b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
@@ -16,8 +16,6 @@ class PythonPartitioner(override val numPartitions: Int) extends Partitioner {
     else {
       val hashCode = {
         if (key.isInstanceOf[Array[Byte]]) {
-          System.err.println("Dumping a byte array!" +           Arrays.hashCode(key.asInstanceOf[Array[Byte]])
-          )
           Arrays.hashCode(key.asInstanceOf[Array[Byte]])
         }
         else

From 2d98fff0651af4d527f41ba50c01f453fa049464 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Thu, 27 Dec 2012 10:13:29 -0800
Subject: [PATCH 029/291] Add IPython support to pyspark-shell.

Suggested by / based on code from @MLnick
---
 pyspark/README           |  3 +++
 pyspark/pyspark/shell.py | 25 +++++++++++++++++--------
 pyspark/requirements.txt |  1 +
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/pyspark/README b/pyspark/README
index 55490e1a83..461176de7d 100644
--- a/pyspark/README
+++ b/pyspark/README
@@ -38,6 +38,9 @@ interacting with Java processes.  It can be installed from
 https://github.com/bartdag/py4j; make sure to install a version that
 contains at least the commits through b7924aabe9.
 
+PySpark requires the `argparse` module, which is included in Python 2.7
+and is is available for Python 2.6 through `pip` or `easy_install`.
+
 PySpark uses the `PYTHONPATH` environment variable to search for Python
 classes; Py4J should be on this path, along with any libraries used by
 PySpark programs.  `PYTHONPATH` will be automatically shipped to worker
diff --git a/pyspark/pyspark/shell.py b/pyspark/pyspark/shell.py
index 7ef30894cb..7012884abc 100644
--- a/pyspark/pyspark/shell.py
+++ b/pyspark/pyspark/shell.py
@@ -1,21 +1,30 @@
 """
 An interactive shell.
 """
+import argparse  # argparse is avaiable for Python < 2.7 through easy_install.
 import code
 import sys
 
 from pyspark.context import SparkContext
 
 
-def main(master='local'):
+def main(master='local', ipython=False):
     sc = SparkContext(master, 'PySparkShell')
-    print "Spark context available as sc."
-    code.interact(local={'sc': sc})
+    user_ns = {'sc' : sc}
+    banner = "Spark context avaiable as sc."
+    if ipython:
+        import IPython
+        IPython.embed(user_ns=user_ns, banner2=banner)
+    else:
+        print banner
+        code.interact(local=user_ns)
 
 
 if __name__ == '__main__':
-    if len(sys.argv) > 1:
-        master = sys.argv[1]
-    else:
-        master = 'local'
-    main(master)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("master", help="Spark master host (default='local')",
+                        nargs='?', type=str, default="local")
+    parser.add_argument("-i", "--ipython", help="Run IPython shell",
+                        action="store_true")
+    args = parser.parse_args()
+    main(args.master, args.ipython)
diff --git a/pyspark/requirements.txt b/pyspark/requirements.txt
index 48fa2ab105..2464ca0074 100644
--- a/pyspark/requirements.txt
+++ b/pyspark/requirements.txt
@@ -4,3 +4,4 @@
 # install Py4J from git once https://github.com/pypa/pip/pull/526 is merged.
 
 # git+git://github.com/bartdag/py4j.git@b7924aabe9c5e63f0a4d8bbd17019534c7ec014e
+argparse

From 85b8f2c64f0fc4be5645d8736629fc082cb3587b Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Thu, 27 Dec 2012 17:55:33 -0800
Subject: [PATCH 030/291] Add epydoc API documentation for PySpark.

---
 docs/README.md                 |   8 +-
 docs/_layouts/global.html      |  10 +-
 docs/_plugins/copy_api_dirs.rb |  17 +++
 pyspark/epydoc.conf            |  19 ++++
 pyspark/pyspark/context.py     |  24 ++++
 pyspark/pyspark/rdd.py         | 195 ++++++++++++++++++++++++++++++---
 6 files changed, 254 insertions(+), 19 deletions(-)
 create mode 100644 pyspark/epydoc.conf

diff --git a/docs/README.md b/docs/README.md
index 092153070e..887f407f18 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -25,10 +25,12 @@ To mark a block of code in your markdown to be syntax highlighted by jekyll duri
     // supported languages too.
     {% endhighlight %}
 
-## Scaladoc
+## API Docs (Scaladoc and Epydoc)
 
 You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory.
 
-When you run `jekyll` in the docs directory, it will also copy over the scala doc for the various Spark subprojects into the docs directory (and then also into the _site directory). We use a jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc.
+Similarly, you can build just the PySpark epydoc by running `epydoc --config epydoc.conf` from the SPARK_PROJECT_ROOT/pyspark directory.
 
-NOTE: To skip the step of building and copying over the scaladoc when you build the docs, run `SKIP_SCALADOC=1 jekyll`.
+When you run `jekyll` in the docs directory, it will also copy over the scaladoc for the various Spark subprojects into the docs directory (and then also into the _site directory). We use a jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc.  The jekyll plugin also generates the PySpark docs using [epydoc](http://epydoc.sourceforge.net/).
+
+NOTE: To skip the step of building and copying over the scaladoc when you build the docs, run `SKIP_SCALADOC=1 jekyll`.  Similarly, `SKIP_EPYDOC=1 jekyll` will skip PySpark API doc generation.
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 41ad5242c9..43a5fa3e1c 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -49,8 +49,14 @@
                                 <li><a href="java-programming-guide.html">Java</a></li>
                             </ul>
                         </li>
-                        
-                        <li><a href="api/core/index.html">API (Scaladoc)</a></li>
+
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" data-toggle="dropdown">API<b class="caret"></b></a>
+                            <ul class="dropdown-menu">
+                                <li><a href="api/core/index.html">Scala/Java (Scaladoc)</a></li>
+                                <li><a href="api/pyspark/index.html">Python (Epydoc)</a></li>
+                            </ul>
+                        </li>
 
                         <li class="dropdown">
                             <a href="#" class="dropdown-toggle" data-toggle="dropdown">Deploying<b class="caret"></b></a>
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index e61c105449..577f3ebe70 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -28,3 +28,20 @@ if ENV['SKIP_SCALADOC'] != '1'
     cp_r(source + "/.", dest)
   end
 end
+
+if ENV['SKIP_EPYDOC'] != '1'
+  puts "Moving to pyspark directory and building epydoc."
+  cd("../pyspark")
+  puts `epydoc --config epydoc.conf`
+
+  puts "Moving back into docs dir."
+  cd("../docs")
+
+  puts "echo making directory pyspark"
+  mkdir_p "pyspark"
+
+  puts "cp -r ../pyspark/docs/. api/pyspark"
+  cp_r("../pyspark/docs/.", "api/pyspark")
+
+  cd("..")
+end
diff --git a/pyspark/epydoc.conf b/pyspark/epydoc.conf
new file mode 100644
index 0000000000..91ac984ba2
--- /dev/null
+++ b/pyspark/epydoc.conf
@@ -0,0 +1,19 @@
+[epydoc] # Epydoc section marker (required by ConfigParser)
+
+# Information about the project.
+name: PySpark
+url: http://spark-project.org
+
+# The list of modules to document.  Modules can be named using
+# dotted names, module filenames, or package directory names.
+# This option may be repeated.
+modules: pyspark
+
+# Write html output to the directory "apidocs"
+output: html
+target: docs/
+
+private: no
+
+exclude: pyspark.cloudpickle pyspark.worker pyspark.join pyspark.serializers
+         pyspark.java_gateway pyspark.examples pyspark.shell
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 032619693a..7758d3e375 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -11,6 +11,11 @@ from py4j.java_collections import ListConverter
 
 
 class SparkContext(object):
+    """
+    Main entry point for Spark functionality. A SparkContext represents the
+    connection to a Spark cluster, and can be used to create L{RDD}s and
+    broadcast variables on that cluster.
+    """
 
     gateway = launch_gateway()
     jvm = gateway.jvm
@@ -36,10 +41,16 @@ class SparkContext(object):
             self._jsc.stop()
 
     def stop(self):
+        """
+        Shut down the SparkContext.
+        """
         self._jsc.stop()
         self._jsc = None
 
     def parallelize(self, c, numSlices=None):
+        """
+        Distribute a local Python collection to form an RDD.
+        """
         numSlices = numSlices or self.defaultParallelism
         # Calling the Java parallelize() method with an ArrayList is too slow,
         # because it sends O(n) Py4J commands.  As an alternative, serialized
@@ -53,17 +64,30 @@ class SparkContext(object):
         return RDD(jrdd, self)
 
     def textFile(self, name, minSplits=None):
+        """
+        Read a text file from HDFS, a local file system (available on all
+        nodes), or any Hadoop-supported file system URI, and return it as an
+        RDD of Strings.
+        """
         minSplits = minSplits or min(self.defaultParallelism, 2)
         jrdd = self._jsc.textFile(name, minSplits)
         return RDD(jrdd, self)
 
     def union(self, rdds):
+        """
+        Build the union of a list of RDDs
+        """
         first = rdds[0]._jrdd
         rest = [x._jrdd for x in rdds[1:]]
         rest = ListConverter().convert(rest, self.gateway._gateway_client)
         return RDD(self._jsc.union(first, rest), self)
 
     def broadcast(self, value):
+        """
+        Broadcast a read-only variable to the cluster, returning a C{Broadcast}
+        object for reading it in distributed functions. The variable will be
+        sent to each cluster only once.
+        """
         jbroadcast = self._jsc.broadcast(bytearray(dump_pickle(value)))
         return Broadcast(jbroadcast.id(), value, jbroadcast,
                          self._pickled_broadcast_vars)
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index d7081dffd2..5af105ef62 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -18,24 +18,50 @@ from pyspark.join import python_join, python_left_outer_join, \
 from py4j.java_collections import ListConverter, MapConverter
 
 
+__all__ = ["RDD"]
+
+
 class RDD(object):
+    """
+    A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+    Represents an immutable, partitioned collection of elements that can be
+    operated on in parallel.
+    """
 
     def __init__(self, jrdd, ctx):
         self._jrdd = jrdd
         self.is_cached = False
         self.ctx = ctx
 
+    @property
+    def context(self):
+        """
+        The L{SparkContext} that this RDD was created on.
+        """
+        return self.ctx
+
     def cache(self):
+        """
+        Persist this RDD with the default storage level (C{MEMORY_ONLY}).
+        """
         self.is_cached = True
         self._jrdd.cache()
         return self
 
+    # TODO persist(self, storageLevel)
+
     def map(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD containing the distinct elements in this RDD.
+        """
         def func(iterator): return imap(f, iterator)
         return PipelinedRDD(self, func, preservesPartitioning)
 
-    def flatMap(self, f):
+    def flatMap(self, f, preservesPartitioning=False):
         """
+        Return a new RDD by first applying a function to all elements of this
+        RDD, and then flattening the results.
+
         >>> rdd = sc.parallelize([2, 3, 4])
         >>> sorted(rdd.flatMap(lambda x: range(1, x)).collect())
         [1, 1, 1, 2, 2, 3]
@@ -43,19 +69,25 @@ class RDD(object):
         [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
         """
         def func(iterator): return chain.from_iterable(imap(f, iterator))
-        return self.mapPartitions(func)
+        return self.mapPartitions(func, preservesPartitioning)
 
-    def mapPartitions(self, f):
+    def mapPartitions(self, f, preservesPartitioning=False):
         """
+        Return a new RDD by applying a function to each partition of this RDD.
+
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
         >>> def f(iterator): yield sum(iterator)
         >>> rdd.mapPartitions(f).collect()
         [3, 7]
         """
-        return PipelinedRDD(self, f)
+        return PipelinedRDD(self, f, preservesPartitioning)
+
+    # TODO: mapPartitionsWithSplit
 
     def filter(self, f):
         """
+        Return a new RDD containing only the elements that satisfy a predicate.
+
         >>> rdd = sc.parallelize([1, 2, 3, 4, 5])
         >>> rdd.filter(lambda x: x % 2 == 0).collect()
         [2, 4]
@@ -65,6 +97,8 @@ class RDD(object):
 
     def distinct(self):
         """
+        Return a new RDD containing the distinct elements in this RDD.
+
         >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
         [1, 2, 3]
         """
@@ -72,21 +106,25 @@ class RDD(object):
                    .reduceByKey(lambda x, _: x) \
                    .map(lambda (x, _): x)
 
-    def sample(self, withReplacement, fraction, seed):
-        jrdd = self._jrdd.sample(withReplacement, fraction, seed)
-        return RDD(jrdd, self.ctx)
+    # TODO: sampling needs to be re-implemented due to Batch
+    #def sample(self, withReplacement, fraction, seed):
+    #    jrdd = self._jrdd.sample(withReplacement, fraction, seed)
+    #    return RDD(jrdd, self.ctx)
 
-    def takeSample(self, withReplacement, num, seed):
-        vals = self._jrdd.takeSample(withReplacement, num, seed)
-        return [load_pickle(bytes(x)) for x in vals]
+    #def takeSample(self, withReplacement, num, seed):
+    #    vals = self._jrdd.takeSample(withReplacement, num, seed)
+    #    return [load_pickle(bytes(x)) for x in vals]
 
     def union(self, other):
         """
+        Return the union of this RDD and another one.
+
         >>> rdd = sc.parallelize([1, 1, 2, 3])
         >>> rdd.union(rdd).collect()
         [1, 1, 2, 3, 1, 1, 2, 3]
 
-        # Union of batched and unbatched RDDs:
+        Union of batched and unbatched RDDs (internal test):
+
         >>> batchedRDD = sc.parallelize([Batch([1, 2, 3, 4, 5])])
         >>> rdd.union(batchedRDD).collect()
         [1, 1, 2, 3, 1, 2, 3, 4, 5]
@@ -95,6 +133,8 @@ class RDD(object):
 
     def __add__(self, other):
         """
+        Return the union of this RDD and another one.
+
         >>> rdd = sc.parallelize([1, 1, 2, 3])
         >>> (rdd + rdd).collect()
         [1, 1, 2, 3, 1, 1, 2, 3]
@@ -107,6 +147,9 @@ class RDD(object):
 
     def glom(self):
         """
+        Return an RDD created by coalescing all elements within each partition
+        into a list.
+
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
         >>> rdd.glom().first()
         [1, 2]
@@ -116,6 +159,10 @@ class RDD(object):
 
     def cartesian(self, other):
         """
+        Return the Cartesian product of this RDD and another one, that is, the
+        RDD of all pairs of elements C{(a, b)} where C{a} is in C{self} and
+        C{b} is in C{other}.
+
         >>> rdd = sc.parallelize([1, 2])
         >>> sorted(rdd.cartesian(rdd).collect())
         [(1, 1), (1, 2), (2, 1), (2, 2)]
@@ -124,6 +171,8 @@ class RDD(object):
 
     def groupBy(self, f, numSplits=None):
         """
+        Return an RDD of grouped items.
+
         >>> rdd = sc.parallelize([1, 1, 2, 3, 5, 8])
         >>> result = rdd.groupBy(lambda x: x % 2).collect()
         >>> sorted([(x, sorted(y)) for (x, y) in result])
@@ -133,6 +182,8 @@ class RDD(object):
 
     def pipe(self, command, env={}):
         """
+        Return an RDD created by piping elements to a forked external process.
+
         >>> sc.parallelize([1, 2, 3]).pipe('cat').collect()
         ['1', '2', '3']
         """
@@ -148,12 +199,17 @@ class RDD(object):
 
     def foreach(self, f):
         """
+        Applies a function to all elements of this RDD.
+
         >>> def f(x): print x
         >>> sc.parallelize([1, 2, 3, 4, 5]).foreach(f)
         """
         self.map(f).collect()  # Force evaluation
 
     def collect(self):
+        """
+        Return a list that contains all of the elements in this RDD.
+        """
         picklesInJava = self._jrdd.rdd().collect()
         return list(self._collect_array_through_file(picklesInJava))
 
@@ -176,6 +232,9 @@ class RDD(object):
 
     def reduce(self, f):
         """
+        Reduces the elements of this RDD using the specified associative binary
+        operator.
+
         >>> from operator import add
         >>> sc.parallelize([1, 2, 3, 4, 5]).reduce(add)
         15
@@ -198,9 +257,11 @@ class RDD(object):
         """
         Aggregate the elements of each partition, and then the results for all
         the partitions, using a given associative function and a neutral "zero
-        value." The function op(t1, t2) is allowed to modify t1 and return it
+        value."
+
+        The function C{op(t1, t2)} is allowed to modify C{t1} and return it
         as its result value to avoid object allocation; however, it should not
-        modify t2.
+        modify C{t2}.
 
         >>> from operator import add
         >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add)
@@ -218,6 +279,8 @@ class RDD(object):
 
     def sum(self):
         """
+        Add up the elements in this RDD.
+
         >>> sc.parallelize([1.0, 2.0, 3.0]).sum()
         6.0
         """
@@ -225,6 +288,8 @@ class RDD(object):
 
     def count(self):
         """
+        Return the number of elements in this RDD.
+
         >>> sc.parallelize([2, 3, 4]).count()
         3
         >>> sc.parallelize([Batch([2, 3, 4])]).count()
@@ -234,6 +299,9 @@ class RDD(object):
 
     def countByValue(self):
         """
+        Return the count of each unique value in this RDD as a dictionary of
+        (value, count) pairs.
+
         >>> sorted(sc.parallelize([1, 2, 1, 2, 2], 2).countByValue().items())
         [(1, 2), (2, 3)]
         """
@@ -250,6 +318,12 @@ class RDD(object):
 
     def take(self, num):
         """
+        Take the first num elements of the RDD.
+
+        This currently scans the partitions *one by one*, so it will be slow if
+        a lot of partitions are required. In that case, use L{collect} to get
+        the whole RDD instead.
+
         >>> sc.parallelize([2, 3, 4]).take(2)
         [2, 3]
         """
@@ -258,12 +332,18 @@ class RDD(object):
 
     def first(self):
         """
+        Return the first element in this RDD.
+
         >>> sc.parallelize([2, 3, 4]).first()
         2
         """
         return self.take(1)[0]
 
+    # TODO: add test and fix for use with Batch
     def saveAsTextFile(self, path):
+        """
+        Save this RDD as a text file, using string representations of elements.
+        """
         def func(iterator):
             return (str(x).encode("utf-8") for x in iterator)
         keyed = PipelinedRDD(self, func)
@@ -274,6 +354,8 @@ class RDD(object):
 
     def collectAsMap(self):
         """
+        Return the key-value pairs in this RDD to the master as a dictionary.
+
         >>> m = sc.parallelize([(1, 2), (3, 4)]).collectAsMap()
         >>> m[1]
         2
@@ -284,6 +366,14 @@ class RDD(object):
 
     def reduceByKey(self, func, numSplits=None):
         """
+        Merge the values for each key using an associative reduce function.
+
+        This will also perform the merging locally on each mapper before
+        sending results to a reducer, similarly to a "combiner" in MapReduce.
+
+        Output will be hash-partitioned with C{numSplits} splits, or the
+        default parallelism level if C{numSplits} is not specified.
+
         >>> from operator import add
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(rdd.reduceByKey(add).collect())
@@ -293,6 +383,12 @@ class RDD(object):
 
     def reduceByKeyLocally(self, func):
         """
+        Merge the values for each key using an associative reduce function, but
+        return the results immediately to the master as a dictionary.
+
+        This will also perform the merging locally on each mapper before
+        sending results to a reducer, similarly to a "combiner" in MapReduce.
+
         >>> from operator import add
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(rdd.reduceByKeyLocally(add).items())
@@ -311,6 +407,9 @@ class RDD(object):
 
     def countByKey(self):
         """
+        Count the number of elements for each key, and return the result to the
+        master as a dictionary.
+
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(rdd.countByKey().items())
         [('a', 2), ('b', 1)]
@@ -319,6 +418,14 @@ class RDD(object):
 
     def join(self, other, numSplits=None):
         """
+        Return an RDD containing all pairs of elements with matching keys in
+        C{self} and C{other}.
+
+        Each pair of elements will be returned as a (k, (v1, v2)) tuple, where
+        (k, v1) is in C{self} and (k, v2) is in C{other}.
+
+        Performs a hash join across the cluster.
+
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2), ("a", 3)])
         >>> sorted(x.join(y).collect())
@@ -328,6 +435,14 @@ class RDD(object):
 
     def leftOuterJoin(self, other, numSplits=None):
         """
+        Perform a left outer join of C{self} and C{other}.
+
+        For each element (k, v) in C{self}, the resulting RDD will either
+        contain all pairs (k, (v, w)) for w in C{other}, or the pair
+        (k, (v, None)) if no elements in other have key k.
+
+        Hash-partitions the resulting RDD into the given number of partitions.
+
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2)])
         >>> sorted(x.leftOuterJoin(y).collect())
@@ -337,6 +452,14 @@ class RDD(object):
 
     def rightOuterJoin(self, other, numSplits=None):
         """
+        Perform a right outer join of C{self} and C{other}.
+
+        For each element (k, w) in C{other}, the resulting RDD will either
+        contain all pairs (k, (v, w)) for v in this, or the pair (k, (None, w))
+        if no elements in C{self} have key k.
+
+        Hash-partitions the resulting RDD into the given number of partitions.
+
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2)])
         >>> sorted(y.rightOuterJoin(x).collect())
@@ -344,8 +467,11 @@ class RDD(object):
         """
         return python_right_outer_join(self, other, numSplits)
 
+    # TODO: add option to control map-side combining
     def partitionBy(self, numSplits, hashFunc=hash):
         """
+        Return a copy of the RDD partitioned using the specified partitioner.
+
         >>> pairs = sc.parallelize([1, 2, 3, 4, 2, 4, 1]).map(lambda x: (x, x))
         >>> sets = pairs.partitionBy(2).glom().collect()
         >>> set(sets[0]).intersection(set(sets[1]))
@@ -371,9 +497,27 @@ class RDD(object):
         jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
         return RDD(jrdd, self.ctx)
 
+    # TODO: add control over map-side aggregation
     def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
                      numSplits=None):
         """
+        Generic function to combine the elements for each key using a custom
+        set of aggregation functions.
+
+        Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined
+        type" C.  Note that V and C can be different -- for example, one might
+        group an RDD of type (Int, Int) into an RDD of type (Int, List[Int]).
+
+        Users provide three functions:
+
+            - C{createCombiner}, which turns a V into a C (e.g., creates
+              a one-element list)
+            - C{mergeValue}, to merge a V into a C (e.g., adds it to the end of
+              a list)
+            - C{mergeCombiners}, to combine two C's into a single one.
+
+        In addition, users can control the partitioning of the output RDD.
+
         >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> def f(x): return x
         >>> def add(a, b): return a + str(b)
@@ -402,8 +546,12 @@ class RDD(object):
             return combiners.iteritems()
         return shuffled.mapPartitions(_mergeCombiners)
 
+    # TODO: support variant with custom partitioner
     def groupByKey(self, numSplits=None):
         """
+        Group the values for each key in the RDD into a single sequence.
+        Hash-partitions the resulting RDD with into numSplits partitions.
+
         >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(x.groupByKey().collect())
         [('a', [1, 1]), ('b', [1])]
@@ -422,20 +570,39 @@ class RDD(object):
         return self.combineByKey(createCombiner, mergeValue, mergeCombiners,
                 numSplits)
 
+    # TODO: add tests
     def flatMapValues(self, f):
+        """
+        Pass each value in the key-value pair RDD through a flatMap function
+        without changing the keys; this also retains the original RDD's
+        partitioning.
+        """
         flat_map_fn = lambda (k, v): ((k, x) for x in f(v))
-        return self.flatMap(flat_map_fn)
+        return self.flatMap(flat_map_fn, preservesPartitioning=True)
 
     def mapValues(self, f):
+        """
+        Pass each value in the key-value pair RDD through a map function
+        without changing the keys; this also retains the original RDD's
+        partitioning.
+        """
         map_values_fn = lambda (k, v): (k, f(v))
         return self.map(map_values_fn, preservesPartitioning=True)
 
     # TODO: support varargs cogroup of several RDDs.
     def groupWith(self, other):
+        """
+        Alias for cogroup.
+        """
         return self.cogroup(other)
 
+    # TODO: add variant with custom parittioner
     def cogroup(self, other, numSplits=None):
         """
+        For each key k in C{self} or C{other}, return a resulting RDD that
+        contains a tuple with the list of values for that key in C{self} as well
+        as C{other}.
+
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2)])
         >>> sorted(x.cogroup(y).collect())

From ac32447cd38beac8f6bc7a90be9fd24666bb46ad Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Thu, 27 Dec 2012 19:59:04 -0800
Subject: [PATCH 031/291] Use addFile() to ship code to cluster in PySpark.

Add options to pyspark.SparkContext constructor.
---
 pyspark/pyspark/context.py | 77 ++++++++++++++++++++++++++++++++++----
 pyspark/pyspark/rdd.py     |  7 ++--
 2 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 7758d3e375..988c81cd5d 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -22,20 +22,54 @@ class SparkContext(object):
     readRDDFromPickleFile = jvm.PythonRDD.readRDDFromPickleFile
     writeArrayToPickleFile = jvm.PythonRDD.writeArrayToPickleFile
 
-    def __init__(self, master, name, defaultParallelism=None, batchSize=-1):
+    def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
+        environment=None, batchSize=1024):
+        """
+        Create a new SparkContext.
+
+        @param master: Cluster URL to connect to
+               (e.g. mesos://host:port, spark://host:port, local[4]).
+        @param jobName: A name for your job, to display on the cluster web UI
+        @param sparkHome: Location where Spark is installed on cluster nodes.
+        @param pyFiles: Collection of .zip or .py files to send to the cluster
+               and add to PYTHONPATH.  These can be paths on the local file
+               system or HDFS, HTTP, HTTPS, or FTP URLs.
+        @param environment: A dictionary of environment variables to set on
+               worker nodes.
+        @param batchSize: The number of Python objects represented as a single
+               Java object.  Set 1 to disable batching or -1 to use an
+               unlimited batch size.
+        """
         self.master = master
-        self.name = name
-        self._jsc = self.jvm.JavaSparkContext(master, name)
-        self.defaultParallelism = \
-            defaultParallelism or self._jsc.sc().defaultParallelism()
-        self.pythonExec = os.environ.get("PYSPARK_PYTHON_EXEC", 'python')
+        self.jobName = jobName
+        self.sparkHome = sparkHome or None # None becomes null in Py4J
+        self.environment = environment or {}
         self.batchSize = batchSize  # -1 represents a unlimited batch size
+
+        # Create the Java SparkContext through Py4J
+        empty_string_array = self.gateway.new_array(self.jvm.String, 0)
+        self._jsc = self.jvm.JavaSparkContext(master, jobName, sparkHome,
+                                              empty_string_array)
+
+        self.pythonExec = os.environ.get("PYSPARK_PYTHON_EXEC", 'python')
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
         # been pickled, so it can determine which Java broadcast objects to
         # send.
         self._pickled_broadcast_vars = set()
 
+        # Deploy any code dependencies specified in the constructor
+        for path in (pyFiles or []):
+            self.addPyFile(path)
+
+    @property
+    def defaultParallelism(self):
+        """
+        Default level of parallelism to use when not given by user (e.g. for
+        reduce tasks)
+        """
+        return self._jsc.sc().defaultParallelism()
+
     def __del__(self):
         if self._jsc:
             self._jsc.stop()
@@ -75,7 +109,7 @@ class SparkContext(object):
 
     def union(self, rdds):
         """
-        Build the union of a list of RDDs
+        Build the union of a list of RDDs.
         """
         first = rdds[0]._jrdd
         rest = [x._jrdd for x in rdds[1:]]
@@ -91,3 +125,32 @@ class SparkContext(object):
         jbroadcast = self._jsc.broadcast(bytearray(dump_pickle(value)))
         return Broadcast(jbroadcast.id(), value, jbroadcast,
                          self._pickled_broadcast_vars)
+
+    def addFile(self, path):
+        """
+        Add a file to be downloaded into the working directory of this Spark
+        job on every node. The C{path} passed can be either a local file,
+        a file in HDFS (or other Hadoop-supported filesystems), or an HTTP,
+        HTTPS or FTP URI.
+        """
+        self._jsc.sc().addFile(path)
+
+    def clearFiles(self):
+        """
+        Clear the job's list of files added by L{addFile} or L{addPyFile} so
+        that they do not get downloaded to any new nodes.
+        """
+        # TODO: remove added .py or .zip files from the PYTHONPATH?
+        self._jsc.sc().clearFiles()
+
+    def addPyFile(self, path):
+        """
+        Add a .py or .zip dependency for all tasks to be executed on this
+        SparkContext in the future.  The C{path} passed can be either a local
+        file, a file in HDFS (or other Hadoop-supported filesystems), or an
+        HTTP, HTTPS or FTP URI.
+        """
+        self.addFile(path)
+        filename = path.split("/")[-1]
+        os.environ["PYTHONPATH"] = \
+            "%s:%s" % (filename, os.environ["PYTHONPATH"])
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 5af105ef62..bf32472d25 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -1,5 +1,6 @@
 import atexit
 from base64 import standard_b64encode as b64enc
+import copy
 from collections import defaultdict
 from itertools import chain, ifilter, imap
 import operator
@@ -673,9 +674,9 @@ class PipelinedRDD(RDD):
             self.ctx.gateway._gateway_client)
         self.ctx._pickled_broadcast_vars.clear()
         class_manifest = self._prev_jrdd.classManifest()
-        env = MapConverter().convert(
-            {'PYTHONPATH' : os.environ.get("PYTHONPATH", "")},
-            self.ctx.gateway._gateway_client)
+        env = copy.copy(self.ctx.environment)
+        env['PYTHONPATH'] = os.environ.get("PYTHONPATH", "")
+        env = MapConverter().convert(env, self.ctx.gateway._gateway_client)
         python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
             pipe_command, env, self.preservesPartitioning, self.ctx.pythonExec,
             broadcast_vars, class_manifest)

From 665466dfff4f89196627a0777eabd3d3894cd296 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Thu, 27 Dec 2012 22:47:37 -0800
Subject: [PATCH 032/291] Simplify PySpark installation.

- Bundle Py4J binaries, since it's hard to install
- Uses Spark's `run` script to launch the Py4J
  gateway, inheriting the settings in spark-env.sh

With these changes, (hopefully) nothing more than
running `sbt/sbt package` will be necessary to run
PySpark.
---
 pyspark/README                  |  23 ++-------------------
 pyspark/lib/PY4J_LICENSE.txt    |  27 ++++++++++++++++++++++++
 pyspark/lib/PY4J_VERSION.txt    |   1 +
 pyspark/lib/py4j0.7.egg         | Bin 0 -> 191756 bytes
 pyspark/lib/py4j0.7.jar         | Bin 0 -> 103286 bytes
 pyspark/pyspark-shell           |   2 +-
 pyspark/pyspark/__init__.py     |   3 +++
 pyspark/pyspark/java_gateway.py |  35 ++++++++++++++++++++++++--------
 pyspark/pyspark/shell.py        |  19 +++++++++--------
 pyspark/requirements.txt        |   7 -------
 pyspark/run-pyspark             |   2 +-
 run                             |   4 ++++
 run2.cmd                        |   2 ++
 13 files changed, 78 insertions(+), 47 deletions(-)
 create mode 100644 pyspark/lib/PY4J_LICENSE.txt
 create mode 100644 pyspark/lib/PY4J_VERSION.txt
 create mode 100644 pyspark/lib/py4j0.7.egg
 create mode 100644 pyspark/lib/py4j0.7.jar
 delete mode 100644 pyspark/requirements.txt

diff --git a/pyspark/README b/pyspark/README
index 461176de7d..d8d521c72c 100644
--- a/pyspark/README
+++ b/pyspark/README
@@ -32,30 +32,11 @@ The `pyspark/pyspark/examples` directory contains a few complete
 examples.
 
 ## Installing PySpark
-
-PySpark requires a development version of Py4J, a Python library for
-interacting with Java processes.  It can be installed from
-https://github.com/bartdag/py4j; make sure to install a version that
-contains at least the commits through b7924aabe9.
-
-PySpark requires the `argparse` module, which is included in Python 2.7
-and is is available for Python 2.6 through `pip` or `easy_install`.
-
-PySpark uses the `PYTHONPATH` environment variable to search for Python
-classes; Py4J should be on this path, along with any libraries used by
-PySpark programs.  `PYTHONPATH` will be automatically shipped to worker
-machines, but the files that it points to must be present on each
-machine.
-
-PySpark requires the Spark assembly JAR, which can be created by running
-`sbt/sbt assembly` in the Spark directory.
-
-Additionally, `SPARK_HOME` should be set to the location of the Spark
+#
+To use PySpark, `SPARK_HOME` should be set to the location of the Spark
 package.
 
 ## Running PySpark
 
 The easiest way to run PySpark is to use the `run-pyspark` and
 `pyspark-shell` scripts, which are included in the `pyspark` directory.
-These scripts automatically load the `spark-conf.sh` file, set
-`SPARK_HOME`, and add the `pyspark` package to the `PYTHONPATH`.
diff --git a/pyspark/lib/PY4J_LICENSE.txt b/pyspark/lib/PY4J_LICENSE.txt
new file mode 100644
index 0000000000..a70279ca14
--- /dev/null
+++ b/pyspark/lib/PY4J_LICENSE.txt
@@ -0,0 +1,27 @@
+
+Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+- The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/pyspark/lib/PY4J_VERSION.txt b/pyspark/lib/PY4J_VERSION.txt
new file mode 100644
index 0000000000..04a0cd52a8
--- /dev/null
+++ b/pyspark/lib/PY4J_VERSION.txt
@@ -0,0 +1 @@
+b7924aabe9c5e63f0a4d8bbd17019534c7ec014e
diff --git a/pyspark/lib/py4j0.7.egg b/pyspark/lib/py4j0.7.egg
new file mode 100644
index 0000000000000000000000000000000000000000..f8a339d8eef591afce7e7e0e5beb73355bf0a4bf
GIT binary patch
literal 191756
zcmaI71CVI3(<eH%ZQHhO+qP}nwr%r{ZQIrz+cWR}cWd|ic3-`hIyuRyr0XQ9?*4Um
z5(Q~s5EK9a00;oG>TJQRpatXce_tRN005BxN;(l`ePtJWC)4YUMyNps6w%viTqrtB
z+~zmHAbbR=Zy<kxmShyFh+u8Loiv>i4cr{9Cp?dwL2t=T(g|t+YWrs}2LLD%<5<R3
z(KzUv^+O$f`I$yrZ0SxL1><iVdQZX-1fgkoyrmJR&9#5E1V8^fmjdG^#{7kMgP3Ao
z-17{ZnyK>~Ity`cu6f4H?6k&rhB_P2YYX}CdOppFKEuT0k4OLT*Y7@r!}iARO?A>d
z*ny6M=%Ibi!$(pAX?_&-;Kn>S7nxO2frv^E0EPcOUBJ3*!DO&CCg6Y51pJqA|7B5e
zaau_^F?o6uQwLK!6H_~5PkkFpJ8Ne;7Y~;sp#L9g)c;OxXYWGmW$8fcY-ncs|Dr0~
z7E=WW0sycB1pq+&?^FuX|MTamDqD8h0tmim)TiJv$*Wc*d01<ySuDL0Ef<dsF^0%)
z=+|&{Z=bVRIh%>pslEdJc~jF@fvk*a;~d^(_$|$t+hyz%V6dk1FVX48*Wa>U>BqT@
z=btdWS~OTt4VRs6Xj->a@nitp6yDKd)HYdo3Y1WQ^XOSU2nUGp5Op4eBZTj#Ff&>;
z!&PQ>&F?&1Wdp8y3%l=8O42;?H{6W(m^2H{vap$WYJ*J`LmlkI!G9(SIEeV%1*81x
zlX)yf*&;TW^atJ&YA7h`wfWjQF45x!el2F)SFN-I+XBjEZ8Lo?CF^dr``ShwbKngV
zxO+WU^SEscVyjlulr)kvQXG9i6jP)G2(JXaa46PRCD)bbOBIF*4(nB7+Hg!#kR4IY
z5r)Tw1dS_&`r0~P>Z+|EXpqjQlHc@k8HBGd7?f>-ys@tq9(Wkn)UMRylzZ}8#@ZH5
z)(EBwvtaz}dhOf#*e24!gy+3wlM+#JD;#SHrxKW*&<Umt0GTzU7U>J%nV_)AUG`G5
zYKkIo-jnj7uQ1Xh)=%Lb_N2bS3(e9kxyo%sK{(0CZh@0AI}Wv!uzhLu``w2GWJq`l
zYq=ynN?@+#14g^xmG3XgCt|uHW8(kSu1!XNpK*ab0KX2jY7gaZUcR{f?9Lb`5AV?*
zJ{M8S7TU(92N%24ACyEN!2eH9wWzNxnFj#?ki-W7_>Z{$Z%$Q~S5*=gRsIiy=4MX0
z9daOqy?Ua{xN|M>HkUw3k|#rxlkiH%HB1$Vr1_9!?t6YD#tg7&I9alB%3XYlN>)X~
zz`HMPEE%C)<o0^x@a*hNqtzpxpwl-`JRj{&hXzr|t)WA?B#L`_CZ?<37P~i*9)+iM
zH?|GOCOY7rC>;~9g05g`pr(*6&>7Qa`+gvON9;!0XkO=w3>yz>Oh+gch*gqcieT?3
zzB=f5OlUZi#)cQu8iG=wrwPyzl3^tw7E5(Cz&7}p;OQKtkT*EPM$wMbhR!9X?LVmp
zmV_+$#}Zd2WFjw-0=ELrE7qh;B`Zq7s`C3kbM1)sfiw_4>VY-v0j*a9yu<^%cmsSA
z2YfRJd_&KH+1#4;BZB9M^b3OI2=_CB<OuZ>a?JH>jq*nZ-Oi?}k1wm#phohS#Q?dd
zrPEPygli}d^t`c_lo))hF4xH`(aA&tz8gqkI3(y9*7h?<sB`h{OQy8H_Tl?yZ(f+@
z3HU?3-<MH7IyqCeM2rBa=<Uq8F&+m`wH@~`?q6&w=IoEKm`~)Z-=2bW?+Zy!t_4@m
z$=W2czG2&htB9r{Uc{#WUtZecHYoVj{k=^aEKvsrC((+BKtGGU^C#D51|@@?J2w!&
zue_KCse?+cgN>El77LU*K#P+*tpm$n)zwu9VpnphVoy3KNiPr~JgQ2!oxWBQx6W$g
zAnmf9#`+ACXR=1H7FrJVY91ryI-ct1I6Ug&3@ifc;U4a>?#8{KJH^<ND?py%tHSQ%
zd!RAWbs!hhRiLKQm7tMPK8VDUSCf)Py1dv8K#;JULzmq07#{MN?^c^6S`{i{9qSRL
zwxehOk^N<&FLz$jS_tCOSm=)RWBG!}On5W)3QW0vM5WcVfy#}L-<_BzbR|PF7_0r$
zIw3Pn2zSROT-J6<)bV#7A!nl?wLGasum|x5^ThFN6rSE*8Oa`C=*GMU3{^Vo(`{eI
zd93|#=*Ux=fj%}MJE)D?KU3L|LlqgCxwj0bHD`^9J#bDI#9}*KLr_0(vEYg0mn)CM
zYWw2t1mVsWg2TEK+(0TPKTjWvb0%WwyDAsm5~@iqPTkn8xY>jFW6KMlnMd7U`r7!*
z*_8=9^rKMN9Jj<x7+KYkx5ZQz{b_D>Ya<~^&3lMyRtsIpW3w-z$qiXpN{$OtX~ouD
zLb1A>5H)u+Y&pl3y${ZfWa_$IKAEIYjdj9V%y`kA@ltUm^Y*W;Z`2}2zB3E|k?1Lf
zkNLLEFMQZ8e+X?G+OwNEJKvF3nzNwqg?H6^_+MFj=i9{XOawC^V5g;eC8Qw|kRME-
zBlv!@q3z|GJhV+=@Zl-IkuIkqfzv6WY<77XbQECwdiLPi+j^V$m-^?m_Q<^G4FAfy
zEetJJuTCLoZuS}i)plS!dgCe56BYXOm+-NdYd-e1yf(_x?hjEJw;H#)(u<R4@XE&l
z*Kb)tZvy^q?e<tR$w{>AG}@qNFA-0w(Z@b}ad&z4IbPxDv-b208^v!NFTN}RI37T}
zbZN2oa3=$;V`(sLgWP*7X4PM`>u|L)6M9wGAq_2Qc&BX+bOl@F#&kG(#5Mg3k3z5Z
zo|tdT6=)NZ*Enj2zs*6G$_1ne!-DH$X0)zmT^(j`{Qsv0?AkMO1^=h{{=p^y+JD!8
zF7^)kHl}W-HviFq)D<ifhyNX@8!X`k|Mdp=hq(VEA^dwgc(Pd0>+4(ES-R-!(>Zto
z|6joVS9YM%@?XEmfB8QU{eSm2HvXBOdJdB$C&&bZ!U$XkjEL3@5P<>~ZUB=QmIo#Y
z7em1Yj4WB4&O!MuNtr%G>e(JX{yK!x6$a*o-kI)BTE_WaescK^l1ireetVnlo=$r6
z=n0xcf-7Ll-u_Bi%u(S^I2a)q`fuYpugw-Dp*u5q{%5P?|62pN|1~aSds_!Xmw&UU
zRhfy)W<cnDL?M0#NK{fGO^0%YS1o<n^j0Or4%>&i8pmt9Gx81W)c`m4Yq~OfoyRfy
z%(nOA+1qyyrA+NLnOA)NF-944d>Uy{7nG_JTL4fkhgK5fu80YhtCli^M6$lGz(|nj
zLuB>AuVBNHvxFB)caZyJiDFt{v|@hSHh|x|+T;JjCVSbevf7exrYfF0!UMjd7hB2*
zs2;+3CPt0r4nmk~$u;O>B%)|QbzP@1@8c3|tjt<>L98`KdM2fQIu@RP2RW+=@^w^D
z8w_qCLM;tw3TFDZ@t9%j8XBkT#PZ6r^a=Cn0s^!e^&`mU=6h5)b|HNs5Zym*u#mW#
z=MP>tZA$>GA-g9`$e<Quj#&JA1IIGkopskdCt2{<Eh3Y*OeZRIx+7-swl>nA{*ceP
z)p;EEXQKT4$M;FC1G2LE8@c6BXN^hH;-|7|CSc<!5j#kB%}j!x*o6xxd7E+4o)Yh3
z9Mb5w`l?@*cgM~JFLKsB)B30FSJgwzjfqRU(w)mcNo|vL-!JRye>FOUD&OwRMtF~4
zv-a*k?1>3(E9TBb@Bp8m!g#lJo-zy9*-UR)`0!dCf;g-^v^(6<OAseIZIncPGEOP)
z<g9Io{qL1WFwPjtGKovte<p;u<@90XcezTlD-5x}f&Wh#sb#5*g#TwHkir81{CoZ<
z<Nj|3F`h}EvD*?u2nb(}r`7atLmrdL2@@}`JyE$Rgoq*#4R|pFnT|ydI<6~GrBphh
zQ--`Nb;z%zpEu4glwUwUpOv7lL_;Zo3tk0TYo6N4wLc5TN&T@}_57V<N2K|c`tW<6
z1ba^g?FZo5(dxH}pv_p{-(a8&R0o0<mqth*pdM1H3s48t3v@&k|5F3Q4`kvGP~Bhe
zRnBXmiu4kAou~k;2Yw6S0o)CCN9G~BPkX1;zqRigfFI-z@deRsY;_B`RUY~pxFW#;
zs78JO*Z_<q7sx7c4rHBlhmfQn&<Z2$J<)Fs;vfJJ53wHs5C^dz1AvFzj|tQON4pFY
z9(FL!fP)?89d^)b$f3z}Hy2<n%^QG?-BdUtXZZz5qUuuuYCF(=rfd6yldI^TD_0hX
zR2UJO#U0EOnIeIF0kb$gfy{`I!%2t_dK-cV(fN`2IPA%+Ud-b7(UXZxGVQsW{rvYC
zU9e8P8GY8BF`tv)r0+K^X3;|l{ruKho4qR73K_99=2HuQE=Z%Q4G%^rbF}lYH$!Gg
zMi(+BULqVU<C7IkS{Hf^z&HSVzP$gQ+DcWF-D@aP$EDHmtONu5U54F|&d3duiBQ!Z
zNu)AI*XOzBbtY5-C7%A>!Gkl`pSL$=oS^4I=lW7~Z*+jfWMGQy2Iv>5sx4r0)xd6o
zU?+Eq_ENOA>u!HAWNvv=teHq-Q=5i#D9B7Xq<G~bf`tPG7J{loq68`_f?9_}wJj`J
zv_JQTO~n(i9aVhqvf>!peFfS{{nd`l=~WgZ)OjSB2KscwiFf<3iG9doz}5JBNh~7I
z%;vhH0U|~G)_wVTQ?VpwdY)<=#{9SNW~@thUbEA{#*oRLW036~$J(^6r-<tB_k&5h
zs9Uc-A7I!cq32MVXYk*8r|EqGF`*h{tYmStYz}#T5F2r!uH4kzsKNKI-_XaAv3(Y(
zv3DnT2LFer-t|S@u-YfRlwS6<=NBAUSJ6^%d6!(0q=VWBH|tR5mz%+JyuUAO+V_Pn
zmg`PD*9p}EmD6ja!AOcBg2C=1`)=qRTrHz)Li*Zooqs58d$}%}<7G3-8aJ1M=mg7n
zl&rLay#dPHO6&DF%2GLsBZi)54rXyx0%>~Z7RO&JIUc?R>mFi0d!v(en64TF6P+s=
z)OYV?{wD39PY{%<H$<O0RCB;*Yod{SsBm;7oHr+u2`?goC0$lYk6~8otDU8UM4y%2
zoiCZa`J^bqEc$r0R>fGMq}siNu=Kz^mpx@4bYk*o$QJIXi^fT{j<9)|aev8wE^R#H
zM6v$0ATYuNMZy)HXioXGruV&q50spI$mkOxmT(bY;cCQI8{%uTa#6L{ZCzPu*!$Jd
zchjEx!2U1Y@gI<cb$^qp{O1hc{6on94SHslc7`^VUZzg}@a0<F)_Pe0;U`a@aT$Jh
zLRSGXMPNXU5~4R?Ri0?A$HZyFtr^>my(jO`-`$P9ljK!NTNK5XPnYcRW15+Hn>T|m
zL#T?04js~mhc}bYIq7_VpTMiQNsC}TNOPRw3<;Kj;~)w02b8rW2oo4fwzTBHS!pJ<
z>=l$9jg=FGjCoOr?1FSsdx*lTFi~?_K+<D~g(tWFC_EldOmWakn<BTLaj8up@B~V)
z2~=t{R-SULDrAGE-nQ0}4OmZ2O1pxz9La%<1JrC#CSWEOsdYqiJ|d`;mZE{WmDT50
zs}y}^%<B1=M^05!v2(`UNj-}qN~pAa_XBv!F?K*=W{$D-hy=)sVu+YytI!Q>Td*C$
zYE~pz+ZF<#devh9X#=$9q(ZSQ#MIiR`xY^tvFs1qO4>FWiEJi<bDCbv(Rg(}8VV!G
zA)lHav<4alq%)JzVwRx8>IiVE@)3aw+E=+wd8@&nXzN2VnB7$bsa^^iiEff;&W~wG
z$UJ5D2F6}{u6jPW6^Fpo(DzAeOtrQu*&gy?o^B&kQinqyR!N-D{@zt%`LUzCV+$du
zGKJQ00V-+2hE1^ylulNP2eRc|FmN|nSl_!s2v>GIidwR6L4*~Yqr_6sil_*y&ZNfT
z*4Hgo9Jqr^cKS!Q@W8TS$9eCYEiEEhsZ&sv?EWU3Ha7;T9f&)dL+MOKN?1Ycq+a_!
zc3`&fGD#+DD1Y-xjIvTmq<!i=PqVgJtAZ)5y_rSmcb}HcP`+%1cbJ%N$h&`VB}iqi
z0xqna;*;GF{0=wFf$C!2V4bj}T_d;lZG}!-!U7WMQ=R(8ee_mmwrl5|Kb<u1TJ8q@
zs6!l@6BnRJUi%_Mbt<in*YDEIez}l3RcpCPWCDmJET!P(Em{xpbSMtmTtmd^-_WqE
z{UN(2Ewfx=&5>^l*a2nSSBAKS#OXT=7u1w$!VDIw3a)u6{NE5t8*8qtl5KhruHA60
zFe*^eS977O>mUWf?<Y~^nDIWWM%!;N=Da&>MmEpO*GgRH)dj6)+6sv?E_qf&EW^Cn
zu)MJCwOJt(7c8#OvX5r$YH=rQ6KXqi*Yu{lxZnTzK8D)42dAMnJ|Bf;NWRsSuENCR
z4p=RcDweGUGlnmFlfOR<KkjR{H^a|)eP_ehea8G9hFh<fXLf5dH>>b=t9WHsL=&<Q
zhl}z&*vUKoc-wR~s9cUi)%^XHdz0loI#!-7z(pSZ>_DM*W$(EECD;FnarNti)v$y8
zvj_f#sF41bP4M64+W1;S+a6mKB~O>U*9IF0s)UDt#n)0dg~Ku$B#LUetPm&?ZC!;5
zW8SQryN!KgN7HRoHfU0r`W~V$M1K_9Kfrl*_o}3_A}cD|iZeO)emaM9&wVy?&hD%8
z_{*@~@1*%~C7@5c-KGrwfY87Opmqc(TNEwYmaRc^+aMUM*`Z~i2o#OJX~1Y#-#{^W
zgqF=h&_8j44ST>Mnb-6R(s&P!i^d|g_}XgO@Bs&!>#A}D>`ZgNA$OOPf$tj@>nsnd
zEb-Wk2nPO`LZ;+2Xb%<{F_eEI<}-0V%Ur3PYT*E(EKUXN+p!{cdeCHA6sezv66u_y
z*s1Vnd!mGaX`kxgRb-T3>tv0+&X!}gEF&v&lI=Zhd|E_|iSKaBhz1$aid-bdi;cw$
zb1oyK7peq3)IdgL)5TB$rZuWe%*!yRYWj&Lrw1bTGxXv0lCCDQ)e|}oPiCKHAP!?<
zvXN7$Eg`}Cjvx>r+M*E?2h<}08iELcAX@^($cpf1y>-eCyQ?WU1aO?=kiosYJpfM!
z+}!SQ#~jBnJv`!>LrEABRY0DVz)S^{fj_|r--|eNVi^2xYGm`^?r*5?hJL7QoGdrQ
z42YO7qNwn}t^PGP%l7~X&1slW6Kj^(mjMwn0VG=F@HyvEQQO!GM8>IA2f1(Z?QM7_
zf2@ew*2?!_jL$+S58N2?5b`^O8VqSJP6C8DLS~bOkLX%g67HR$epwiNyhR$CBWLT~
zD_uwx4~iq!gz9!R$ME$w!}}l6`z}^yCvQ};^T;eQK=~9vWG3E71{s*xw1o-=SE_-q
z(KQwyfieKsY4Xern7dQN5Sp?Gl%J4ll%nkrs*BH9A{OmCckrP{Bpx0!oA^e3dWOqJ
zS=?yew)L5CgbaOu;2Aj6*(djZ(I{bFaU(o*NciBx^|wtKQ8FWpEt1ng0Lp5^`dm1v
zlC#Eh{5?qim;EE3zU2PYdsX`G{Bj;L7$74Yj|$<QZ~+V(9|}MYW*8alpVTlf71o9Y
z_5w_~AqowAFoP0&X#I<!R%`6<__e{|aSPqJvCE5vaPQ01KtC^&w1x(dKy~BLu7g00
zsYV|btGPwT(y}Ang5Zwz=0v#tgUFehqmnP7lHtFJIprWpitmJ~QbuD2alHuq8VT@Z
zh5UsBnoY$J&XfUI&|7mwv3{K@OxdI@l~Rzup9)at!*m~i+oVnO;q+)uF+KH(zXU@a
znguYpuaOK@iMyd8v|%VJ9m29~Nb{S9XnB12piJFA!>zBpv%g{NF>Lo~2n_mWnqr+U
zh=P|XDLx#72R;uR%jh(jWdK7=JAi~;W%7=}Z~Lc%L9@Re5toB)sNB^pgS}AEHD|zo
zk=`cZgkVP`Nf#D)d=;*kP}RlcdI@4pbFzw$AlZzIy75oVE%K=Lp>FMp^Nayn`U$z<
zRH@=wd|lbXvK|>*BrYHkIxu8!JZ%)rjvK=oO}I`_L?VfziEeF>JUTbK!w_B<J&;!2
zYRXz|`pjKvk@!{kc<2MEhaM}oJ@jOhApX$)1)F^3d6;RHR-fT)0^-D$Mqmt#rrs%C
z{Oux8Ecraune0wcrwa=IykMu0&2n4%yRKWlWvKyj=KM7MV(C?u#u&X^Ya)Z()x=6{
zyqOxKIb%9)egDvhLjTxuIC#`lx}N+Or9(nxXn|EFQKIgo*^pvp86&E&4pgSN<_u#r
z)W(oR`)4a?go+x!GWny)h1)#&T}9IYIGaVHRDg<yUhUPG4meu+TEB_|i&V#ja{k%j
zlqwkN-W}C!MoO6`CBrjDh+exgF<TMI)PYZ3Qim>`Zkj4`3T78x-#P+)86nlj;ogs1
zm9t}qsw-`dOsN6mAhi-#2|Sgp|Jaq@Ah-{^!(RQ&8$A1Oo4a}g=TtOavu9TdwadcR
zj$HdrgAWahTH+VV@NMz-cypf7N>G0-w=0a_i@Om5#$mQgwH@ClH4WOYlMt^?$PGV2
zTRn@3JnULg0BuSOvi>{nJa1RTa?QtVr^(JOZ7QJ##PY(j3^PK72Q5Ro-`SwY488tc
zv<sD_%69K<H3w=~(xd^*8Quw>jt{D!OworKT`~WK%79Jn`qYb)@yY1qhyH)T%zw;B
z9rM}hMi>BqUm*a1f15}D)Ah76bTia9wzsh{HFmMIw{!lF6{&M+kIf19d($VB?swPW
z8e7tZ4=30xz;?M?nl6sm1x@p3M5XP-WAi@79#7Io^!Jsxa!lee>WE~EMgdsVCbEPg
z>MJtE>?5Yf=O;<_NE1Q#rup@Vo&GnHHrK9wi!;{TlR|@NQ5C0j0Hk;B2qdDII?<%R
zfA@hJu5U;|rkNmao#oh)N-1sRYKKePAo}8}VipjaJ}8B@)Zq=Sz9Bo@Cdr&8=3Qob
z-DJkhX!el-+l4E)SmX{yOK?{kuTj1J<7(w**RH&I&KZMr!@Tc-L_0!(X6=gIkCF(l
zgeBAyi9Yjo?vX}5<hI$MGb%aeL=iw4%@4zb=)iE(+$U+&Fb`5+$%Vwc(6>rc{xK$)
zRPNmh6z!i&@Mcd}RC#`hw{&M7UKxKek-&FJp?JZidQ`K@?~f;QbP{Fmg|iIvLo3)W
zu?=t(RL(P%q>eUb&d}!%S=M=KJrgeN#P<%P^?LDHsUat_Xr4~=m=09KntUzZft6OH
zie%0>tv=-m=T?qN{MWce8I4?zu(w_Qacg4(P`f1N5*0pJ#VY>k98%{`<PuzU)bvrP
zLB7p<e*e%QLoXP55FP0P1T)uz#VC@7u>-4I5MOMQ2We=(k*^VvWWbLXArX=ZiOhg+
z5k@!7ACyQdG~f(iNx*bqbI2lsM_+o>gYG$|yn+*;hAgHQepq0{i;JW3h6w{>yP=p6
zVEpv;_&22n;MEa)UE#Z!SYtLwgHb~U^rdH%BC@=p76KO=<|Cd3LMUwFBPv97hCI@1
zC#nrmPsZh|Ld6SvSw=(z0Ypk|gyK9?AqLV=_7}zr`ztO%@Bu`A!$<NRJds7_7Q9NO
zVUMtvD8SfdY7Au!iHK=TP9cVHewl3<P(aA&@+BU_JPaMU53yZMp2_lPLIMyC;|fb}
zgtuu&Txqx%AwD+dpFn(!+HMoYoC6(`W)((S8YP;hzaI*RzYx}XB30^tuaTYi5YM7o
zb7a_9I<?YjbX#jwZC7WhK_s>WG=9F^A^_Er!bY8HA-hEmr9JtkQTjOMfzA&${w<3Q
z^5hp90?ng|tsqPS1v(du@Va^m8m0C~<#u1dQba}|#3*uL1o^iR?X&wRPyrA(sz>cx
z*Zku3#-t$d4T4z+y^$_{6Uow$T{drR)9VH*ytAPs;ft!=_MbI0q^Js1S}@By{sT@L
zX2iA%S(!+UQT3vt``UFD0JwF7o?QXsC~lS#I!yQ?7vN;e#oq|m6{ihat2aL)HHo8!
z$~Gi?+lPjIz1Pc|cHbA*TWxm$%he=vJSGo)fRghm)p~V7!0EBx{yzji88L!!^VHU>
z-yf)AZ)y>@LCUavV}i$s*beKCk#04S(iV16lIKl9h<-bS7z3Eu#tX0E<+r2$Z^Z`~
zui^#Id_GsMkkM-w08I-hB*GmtNAO+v8`dw#$b<%niZ6epz#Kk}5nP-&@UR#S5@Sg$
zw|wm#_M0_nIRw1hWuP;G<gF?z%Z2ER>(T_>Htv9dt7f&v1w?z8bf$w`CX?0pP~T1E
zR&?edqQzxj#B|zO1u2%=Om8h3I$X6~gRxB7odC2)Uv8pf+ljZs`qkPLaa&Z&UgyWz
z%ngE^tW}F}GgNJ=4Dv!ovhf1<V)gFyQ14Paf^p%xamgU{xdF<d+o8G<a}aw%+7mUv
z=uvU7>U_ya90b+iMjm7g2DcJ_2q`PxG9YSvf;|ZcmK*m+S}<x{GHG43?3Rkf<<W7f
zP^*oy?k>=4dTR%{B_!!bA~YGDuv|_`zC>g^^Q1}*2qGY?!l8Je)?(x-JGDs%n8t&s
zDQXLRf(Es-q?7f=CeWF9;`^~KQI{V#wfuQmxBwe;6yNb?hGTQ*?>5eOX5>7waJvJg
z_5g7p_yxF+irbEEn%2N60NMaZs$KQZ+X6r`NWSSd^5vB#u@&TsK^FH#RTT<dh_#_Z
zBwM(}q$Rqy5?k5X8Zdm|aA*bdH!Il*Nq}1~6iMoQa}ERJXx7eCNG-L_wTdv|)(=KT
z4)A{#-Y+f=Htc^k4lWSPXs~}l1Npsvtves*q{g#JR^unc`?0(66x@De{-hG>kHgA*
zKdJLf@RWL~@)O_<j%x+Ghoc({4;4RYk(m9ddxcd398ATK|E?84pd#w~0BOrh*-)wM
zoao698&I+LGBAEi)S7Bcj1(sy6CVO+VG8SYA(`eVm8*Mgr3H5U=|L+jRS6V;RRZ{o
z0Y0Auid3s<*`TCm983=UIKf=LSP|=hX&8j0(Cazdg!ZD-OId-yv8kPO721i6CS$<_
zUZ!S9%#O@kK?;C!XT?y`5&GCJ&jn99!R<*^$AO&z;M!Na`n-dU$Moq_pQR8|nj0gQ
zB1alW@%6L<;cj46)Y6?S>|cKyV-~3cE`ChHaIa#DPBBN(Bb?SHG#x~FC1KH5P)Ez&
zO<VjJIB<C)*>4f5sZkdGU<vjsc20yf#wRtvRme6=$kB3`w^5yYoUje-27!PhbRb)e
zNGca{DwUABEH)Eo`VmA?|3rmPD!AizvQioEH>36h0C@g3RXJ`<uu~{%Rz@u5=MF$0
zhjawDrZ!tl3%kG63nZ(M&u^h7G)L{Ev8S*~_fok>tyoW_3AGEXBYV4MZZ>132B>as
z9UETT+;kyM;l#}2g`4CdWodvKLDq=<2yO^-TIdCLuJ5~Xz!Td(0cLywW21K*YLwiC
zC;&I6q_F3XxE$L3k;nJG;tEhKb8(!!i*svTeI|{JI~iHu+LJf^W>X3R>aLhjc4Epf
z09Xxbbi}jdBUFC+_>%zA*_4($S&f`ivjFvX-RBPgO@ZT^4chn=CWW2D+0shWa&pwl
zJI=K9vLliX7$UqR2IkEt{?wB0Wv<-#&mnL@?WMjp^3HQHP_H|vO+BVwUq;Ra;*v;b
z%7}q&r6p^lnzo#+74F;_<&N}q7MXxVdm-&No5<XHHUXg6c2p-<brv87fXqM>I#;7U
zfc)`V*mjSE-}Xqojx*Mdo%D4qr1g?_w$x4tS9<P>U@A>UUXm)V+{W_>;Fj|vv*k8`
zk?|bcE(X43fQ!gJF;QHP-r5$lBbuz*bmwWeHq?{DJfGuJ)nqrve3k`_SRSp8q}V}V
z*?YrhqnhE&MhzWg)slLgg4$bGS_-_n*24-Eb6%GmNNS2y%a&SHeJ2)!o{zo(jeWW7
z_!~<!1I`i$-RDWaggwk}7^pKfT(&$K?g#<6Xw8+c4Ne&$o-D~O+dMUvMZKcmPVWL;
zcOBkVN7%?5m}?D@b*1S_QR`asn`aQKZ8NUq0e-8kt*v)@@niDQLWj<zQZ^^YucO`1
z(e{_%?94CJn!SJ9Uq%>xWsH6EW))oiR1!=fai|>{?OiLv84GzwL~3~Fg0gnrDM1#w
zJdSKkK*zQKokzJhcNaf57mi+bfJ39heb;YQ$*3dCAgx6+#kC%i(h^6jGB(6v>SoXx
zE#e)t_UWNdn){2-WN^*>NjFm0O4MhO^tPF#n?<R~tkRw{BOLz9m32Qv9emXllW%6r
zrrjD(Ifa#1_v){z1b=VEP4lA?9s2dD*vZuRMV6i9bs7{M4ZX9Y0#-3T7~wf03Xz&a
zRl|$mW)7;2Od5~=hT@g(2awDjm$uWU$TnU9TYkC*_?|61ACf<hp3jZ59Pv$HKe26-
zujcW{f0Q!&U=zw5l{^V8l9evM4h5%Q5iHD3gKlL>E7y&<(1-TI(ThLV;^6s<)66UK
z8x2!VA$z+z_c6QBT;3(XA3bO#>Z3(7xdfNz(kIm-D5Yb_^V59YuXM;X4P=S3_O<Yn
zS3|4Is>M;y3R)YK`9QB^TZfrqQb>Y}IA}*yz;9MB6w*T$u1k_UmbT2<KL}lxF7+V!
zW~+&x<wep@+%IgyVfB77BCjNRkExO9+xA{aT8<OzcL7(i<7JItEo!}N@S0|E_G_a$
zPte>&M#?;cRq2KZ=7JpV?DOafy0(0hrK6LuAfKudoJET5O9w)fu-712_({Q{+KkL(
zO{OE)PT`H@zo8l$&D8ZV+}qiyc-P~o*fLiTw$0WRpctRD*V6_s)Q*T!8*eD}d6OIo
zCnUOfg}S(=vW4rsG4OTK$mUNM=6$md(n$mn(`({3(i%B>@zcF}u!e#o;O8T6i=>o3
zRIPmpu3eBEA)^86&_xpZERlzxIMx9(7A4MWw6P1Z`5=_TSZ14(=7-#W-s0Vd_@LjN
zn2u)R-8(9R+^qeOO4gW&Y~QzFBoUi-G9CnLY{63Pdu-ULddiwdG9sF`Vm)~9^l;+l
z=-aSXztTZ%jeqg~%gy;uG)<rPS`QBs0D#f}0N`Ko|LNu!&*<pdY_Gfa=I~pJCDCd(
znQrJs@8+w>bvN}$9xZR|)U7U(?P@elE!>!yAI&c(BvH&bjFOC{8C6Og$!7nQ*+!vB
z_$#2;2Db?)u+WR<FQQ2Vwh09Ewg@bw|Ma5xN@NqAE6=&p(TC??TH2bAZmi6*dj3l{
z?|o-DOrLwkrN7fTeVV8X{+b^>4^m<vd#fQ0p%n_%kle&pSrz6Yq{M$U6(^uzdo&fN
zBE9rf9*Xo3QiCesBPSCo;G-uSD$qqsDaKAjYDuU_^C;=9h>Q5BsEUjD$f%5q{3KN1
zCLBQ(xoQ3$6*1BM6&E>)sK7@}{nt!Lh3qt(P$3(I>aD?XWE49aeW*)_2XXIqlF=Z2
z?)qRvOMI31Ht1*={}5`}%Ydezf%HPytFR9pegsV(!aIu|_BQyhy`>=@>WqZnljw$*
z^h$EYo92d>_Ufq0o$AI{io-m_8Hya9rSRam@_6Fn%|wjjPlNnjE~clS+WOMOV|l1A
zljl!NgFNY6KI)65s~ii(9G~d0t#5MHqSBDv3eJvukl7IQwknsN^_y4g!H(<K{PXJb
zT5T6v?wXYxs|$9XUah9!K8V-)y!}lG_IV_IT2Zv~$7C4xxhp3pE^AhY&C1h?TE!Yx
z+V%R!-#9yNYqe^$*;J|0>R#i}W$DKrJI@bj`kXcW4v!0?N|zC<1?4a5kLomhI}X<J
z-eP)<%Q*8+!3ognS+feru|~4FS~i=pn$E%_?N$OFncP*Y`HEJdF_+7kF#Oq5#t#Hm
z>Du`sP<qW7GFld#vbrYc@)Hl$aQm3LG2c=XeGD^CpAMer;06iUJgGh}gwykB=QiOy
zmm#Pch9~_DQTjQcoL!9_l9G{fX*u6ihPWkHa+XUt+Ral<$7iZLuId9n7JL}-V(7Jo
z`G$irr|7AAAv^l{Kw+oq#wRlZ=V2g{Mv@_paX}l$gz$=lX)IH)hUYVlvK1H?X~d3G
zu)-TLd7uN@3DinIq9VE9S{sg!YZ8<8$U>{uQQkw0KPWR8@~cnShQSyT?L6AAktM^L
zX!0&yIBZ!?NJhC^e~vy`es;r-?nUllR1eYW1!YcZx&4-}r&a%j)rOMlfbh8z$cM|w
z+(Oq&*@*EZo<+~hz%;0s$!XhBo->-jBnDkrWY?U9mrRH<F$}!ab`qb)cmtKSjgvyt
z;EDdQJ#KITy3B1dR<o|%4{DlevJ9YFscGBI^fp)_Q=R=)Yelr-_rEyY9?Ns@nXlA!
zSY4xMap!CDG)!i&Y6GRsu)e|Ae^bsN9}CH!uhJR@$kJk|yD~C7*G!8aEp$`?T!5EA
zaf5g*^F(_z#VEqa>kcAMxU-p5L)I1^^p_0DbR&mr+CCi))?ty+^IbqTI@74K&T27v
zr@bv15A|$RMd~GIk%fky&(og|DUXy#4kXpp_7{h}Bu$V@M=INU9QeCmv31^C`=KlU
z&~eyIaCn<BhU!1ZSe|3Ohk+oC<Yr@L=^p#Dkb7M<gr#7wQcbI$?|Qm=Haqr6%RX8A
zca*4-?J!=H^<~>LsW}CK<g~E7p=I59s#%!eJTXI>6ep0es3ms8OuT}Xc_x0kf!&?&
z;e!Z-1}9ien$HE`%LriBoDqIl_&DCGcrzw#CC<_qtDek=LLwcBT^S3R3-?gDOOaI;
z4e$sf>tNV6L8ckjR$l<@G|m`LKzSUs#i%fYHja^{rmxnl9gYoO4?J0SR1h!@5iV>W
z^uPdP77i{dXd4$Ng~+{}2`CrVL(5v1%X`3doz|bcu)$Jdc=WLq3-csmIB?enWbth&
z_a22!wosezV$)0cfvi!x-H3NhyHTpJ43&Q$m^pj7LDTYdeZL_nH3dgCYk7F2#)!ul
zXvReUBJ%88Ldp|}>G>!$wly^sOkc)Q5e*^(Sr0~de6~NWd5J%jYp_PfB0$(Mi=MQl
zsz^W$kSh}e<vcu}T<MM}U`7q8Qel~p<x#hK)#(G)vw*<AG@Li$TA4EJOAx%<Mt*0G
zGnheLDR>!Yj-{Tg;L<377#C(fbQy@XSJ{5sb+e}5<C>k#ww$c-eN%89N8Tv;m@#lE
z1}HrPWtN(yl*u-D{%(F^4Q}vSaBw8Lj8?*;Q}2#4sY;O0JteU^&m?ONKVH3b<35+H
zlRx-qjiBVh-bElo@^9dykO&!>i2`IT^wZXn?<d*q5<4?uFXjnClW88fvj93s#@Ucl
zY5Q#CoQcCLp7&UG_j`hSxYAsMlfQCTGO8^-^mvEA<874R@1kDNt%bT~xvoMSY1xHz
zIi~s<-9wsL^j4Rzuogi3spuvd*Ou(>Nt2MapfzCj>n(DGw>Paz?pmpm3H#roMc!=K
zHR<!$?v!+|d49yqq2rA5{$m=8qfHu&#Q=Hx9I;^EKP@rx#h!38@P&ahseW^h77c4h
z?;lrB)AQH(13V23p*Dod(}_#TZ_zvWi<FI@z%`AZkDo->KOCAK&%;zp!|~d~5OaZ~
zJv2G^Hp1NW^!=EFrg4;_cJm2N!C|S`Dk8(=PBCIcW8-)y!3z|#%9z42;Nam!0P-gc
zLEIb|;pm{MdmlCDPem4*kgt)mcmuvf(Ca6>j@^TxU5wa^2^;Qqxz;DeN6xiLe@TBz
zpdt0H-MZ6;g}L+;<;9+lRpiy4k4v3LKLT=@r`j;9tY#X&)COk{8p_AS+#3qAeWI5&
z7kGi)kL`@}4AF`quPMHH|Io7YktM<9-rMj8!Vss_Mcxi=F_?iv5}u2`s?zSc!3QB_
znWuBYdA}kX^jp;Pmbsnk&Nm|hc#fw%(k*o`^_Mc_S=o+oRmGb9seLC*Y&g}e7@ajh
zNaI1H)aQvSH}o-JaA;Jt<i(S~$3l>3u-gK6JPgDm>5zSQ1vX|`-~EgaZ|wv$gw5-Z
ze+8gq!@@^}yq)hun7ASvBV$uF2;xG=8@4Ng8BNuUr5C4Do_seczfaM*tR=p~5Eezb
zz;UDbQKI>#kDw^(;UyTm)eVgfN(l7tm*(z6XvATD(2;5G?l7vOq1)-^>uVb4E4UxR
z;7P<1SySH~ofkxagSkg0Vlyt@G_+MRZZ8?aKSl-N2DI0^;qnt;&Zui&WoSre)kB?R
z@yD5oCSt-Fof6`V4}Qg_U%Jj8<slD!!ZvTogaMy!Ql#z?Pu<OU^|dbF^4CHA44KQp
zVXhhtY_5UIIZ5%)Q2#L$d=D3%<i}C_F=TvzR|UBi*1Gy^tREwg+^fG@-Z?8Cj<0JH
zRCf{gat)ND8g0cLPS{|FDMHN2)NoAzbzajM<C~kH7s-nJT^)k$X&g15xYr|IuEtJl
zzad$qtX6Y{&g>9DnuW!vI~N1Vax}m4(t`P_Qf}1>1c|~!;b`I~pl@v})>a)(dM>+N
z-I?#X1R1Y=i$fiM78kx{{i2<P(84n`F24YA=Fx9w=OXnwQ(v?b*mA|`R)33z<gE~h
z9jGOJ-p8{yfIQc6lSYm;AGSloMxZBV%JRoUpL{ANTu;Tgwq*#jJTC<Pn@P1)^gXv<
zBZJpA!+lI|q4eAMV)EMusxq;iY0kan6}%qgF#x_ns&<;1qg@Rc-dAr(&?U!YviWbj
zs>;;9E$HqKYRIDCIXar@p8&2)+1Y%ch<by~dkOK}Ri{l%+l-gGP8jU-Zix`$Dk9s0
zSEha(LNh5c&v7z2M7eT^&k)8<(eZ@AlXD*Wcnb?b8@NJf=VOB{cV|U@TYVJ?OdhqY
zldG*3r7fCaTxf{EF3+J1|E+E>V@pdv&jFo0eJvRI(Vct(V+FsXl_%4;Xo#^a*x4<j
zOdy(q@Ur@3h%DL77l$*Vof5lVPWu`RasbpP*Zuhp3m>1p+Ys74W>XBdd@}lNr@2mZ
zJI+N>_1D4H*6#hhqu2P&@Wo}g+z)%hRYkIwgK0=x9qP&+RX$4TxD-+%I%T${G8-|Q
ze86XPJKXRD#K0jNk%=jn``H)|XkSjgp&m~K)dxTc-V%x2TQXLd(!dB^4=ys@_m9@<
zXx(AvG)9y{<|8t!grPS^mWaL<#aRq8GRJB;gBl3b7r-?P`zO_g!}DIu<#S8@=zAFr
zJmMkzBTm$_SK{1}d-lO_f6TFLq)H;EH6oEy03O1JvyMKTol&HtbkE_0hiu7${V!C{
zrlfV8NHe7@X(HUZ;1+7cuqQ4`TnvsH_gtLBN1Kov{p}+LG;1<;f+GyAioAUca5QTl
zE;Yw7E)Z5Rh<LNmajS$pguHe6ib(m5L=D1A68ChRVQMKmv4arHCis=aPXmq*b4P^2
zPvb(Ttm9tXC9(?KDU{M@N)po3(ukzY@edn+1*wUp>E|pZH&4P7NTGhDf}KnRdr}!I
z9j^e5-swO&w|z>1({u+CV)kyY;m%D=fCsA#wHkkp$T_^utup{%?rn&qH4Gf*8T)0@
z#$Oqd?On!?k+&#Lens{IQHo1jDW`r^8Oajk!$+Bie;j1AloZ_nyOhK~-hz=Il`6NT
zm*Tx1{i?uetHnSLv;=0fT;uvtV{P)G(>TD6OfyNvtMpgk*&#`^Z>7?v7ey47^uyh=
zYn^Q}Mnj3M-zcmz^#j_6CNpPVCd*gC#x!&f2(rMr!#q8a7IikaPvH+GzA4kxE-L64
z!_Yk|o}$ee*HnuL2?rF+a>P2$wSs}u(H{U%G$g1jrZQ}hn?GwWy%WH~y@(&s=@W_s
z+Slt`%@&`3OMB3KXij>2!ihv;20g%M%u^PV|K2uB3HL%)N>|YK2>W<L#svNW8WJ-u
z9N?3*>Nv3LH4+pzMMBE^x@#!$DhUIfxX7x9`$!Kth#Ap0&Ra@U!a2$RqP7xeM$R1C
z<WMP*5*i`uBY1J9TK^>La`2Romci`=w?$9>4zCEPP!0?Cinx6qIq@BCPd{$6mG~7+
zZ9n@(u@**>k0>)K!jmLqORCpw?N7A2V9U}LGL5!kOdj?u=CglHjQL4YxF{6*hqrNY
zDEIo2ZSSRrAdh$%?_4vxJ$?R?-fu{voqcO~p8oiyVWTQm=Lu4<xN4P!j;UB&MM}cr
zs#X?6R3Ei6Mf>RBV?{ywD#-E=11c(4(N+gXJ`A|1A8K-9u3Z%{r$M?Xz*9!LTH~1&
zQteF0sb^{~hWsPo=Aph}@D_J4_&mfmx$t?YFPeNFaD~z45UI89EqS2X7hE&q5<2%g
zxtY}8tzq&~3qt8NI>A&z8bWBz_qw+_e;y9B&z`*LcuOZid$_E^v$?5O2b9@OK+i7i
z=_WLUIJ3q@arLMTP9(K9NW(+&VqHOMRUPK9Iv=Sj1Rp0mq*T>L4I3F%;C2*t2w>;t
z8f~^xTivZ)&~3|9j>QHi0+U-=jjZu@?si<)nyq*+iR<`L<s(tF>Wj#2AT);aR3@+b
zh66*3`7=<4-G@KvK<{<$@dQgSN?x@QAVZk=0#B0#B)IHMbfPUc*j;+$+WqBmQ1B+B
zM1D?N&Y|E%A%|s{a)%~{JRZSW=P~CMM?!G7x}5aUU&;E{j50a5Ps7%YJ<FU0HCqd8
z;;VS#!idhufIq4*n#Rfs{G$oxhZWX{8X74pM16!CppvIy#`tziPe<CY9cka+q1$%@
z{rif^x4#KhduHr-S3lF(shy71i&(ls7i}=70>|nhZPRJL$Idk=mdoV0hR|+ioeyiT
zHK+eLNTYiXEyTfe8SDaCbm9Dt;oZ#6iG3clnRN8mKRxX`HB{5Kk2-RRJ0)%+VUm06
zwBTO2IhV{l18t&C;SwN6_<F#3A2=D?#-1kKFb6eAJYru)g>AzpFZI#lj>#ImA#(>K
z(!R7pKe1=!&Nq=nB)8P!Ex}oaHyO9=RaY6+Ys3@o(7G<M1R*vPa2E5GWBdye9mf=5
z5;eQWT?Kos4(B6JjB8;Vj&51Jd`zq=ac3k+<YHtl9PSHAMyuyTjXF?JVa8-84`jfX
z1Tk_V1%yM7OSB!;Vj!*K2BJrXvP3`053--XoVBR%n0AL(4T2edrjR^_D5(%>Rl1o|
zw3c-&uUN5J%IC~0Uq3&wtC2ub;jc-~!?!-KWvtq&KFN{hv{CW0HXugboEzw~+M|Rh
zN}UL$!^wr}@zuD7==FLloSUkzP7lP6L;F*rew)Je6jTz{u$8x4j4~7|D-Sg_eZ+-*
zWkn5>gd1#z$3K2C7s0pK3%8UJ&pHRn3Rarun1`VAP7sSFJLWN7eHRHl6E=Ew5YH-0
zP!pwJD*7WB;=f#ahxiUQTM{p251Bet7+ct1qvav;%4#{a(emUPOEX^#e1@kLwBZr8
zm@FHw+!R_CSiY#{6S@F8Cb+gM0$Ap>(Ih_=`!n+fOWqd4s74^gU9L@1?`(+O#4@YJ
zCpRQ@oC%<0ul$G=Q@MO5z5L9{@CF&gm4P<rMn*N0DX|=iq_8+5L^()3?FRP;x1_|7
zf|%`Zl(Dz&Zj{m8{92K_PVX)!`tf&v>4+4BYMZAbk4NQ$7*kz$?sd}aKF~#@px#zv
z6H&q4XM+!Y%@aXnmKo92w&9275r57rC)tWG)RFrxX1aOOSG}*(UgDP`(EO<t6;zU}
z1X}nD@lAUEd`F-6T_MrqfkyiK(cZZ~+3+Wgu3LsPe(hDcEU9uClW|Rj<{|sN%|8~x
z_oW}=TmAsDd^5`V{qL8QJ?@4y@8}OD_&D}K*bzKYJ=xymKk;YD&Z=UY^=mMHeh)<X
z4n+}-&A+8iS?H34IqiX~!qf5V-7*Fz*eM1CzGQ01zkGO9mU)}_&S01pgU%q@$oOm6
z(rnYx44#(C04zrhrsH+=!FSbOA30mfz{2rT-ELL(esmv3&s+@4%LVLr0?9xC{4oLt
zCi9M<j+b7Pr0h+dxIXviMM6Xd<2J+3*W6ibB7Lz1+dl_gm@ltP-=7#NzkI7Nr$Aes
z=Ai8Sj*hO90MgvD(p=v1lLJA^?(CQK&HEU~CqEB&>~tO3Ab2iNHK6mb9Y+&tX9^s{
zXB^eBL4URA7OejZ-_~9c@7vUPG3K#VYw*iht<8b#ZkfM9k^6H6ZxJgPI^~9`5rH}*
zGKdDA!^0=YWSM*WG^p)1KQ{PN#sHIkno^6FokzTkHcvq%Oh3w=#h)3IVTkMjQJ*bx
z5-j%`+Zq^I`0Cd0m7x9#5@T2+Q?KWK^x;TbB&`WhL&UGMYT;q9-gQIxxlKxDE*jjx
zx~i*Aq?ub*hHzBsiY&z*1OB9x%%h~w4U_^7%kS-Z-E}et?`l)Nx)nRxo(}887YI|{
z;vGSFZ-AO{mR$+sJSi2sXvie@T15>{y9}<D6nZIGUQl~uQpdBXSt`x&Xfco?v(NV?
zs&^)B^7-klYCE7!cGaFDcMP!$&hx-Y4JF7qm5iR%LUn@V<v!F!=AqoNn_Rk#N|_*7
z)nl)F{UsgpJKg6uMlfh60aRpR%X+~a{*CqQt#vtW4)5s<mhC9jkB;>lXiLA66&<86
zVTL}e`WB4=<o?nMa^S;4-w{5`uet{pdo9g;L+bTILGTlnc^IUW@E4N>NBM^+%YEUC
zBnw<f)Ure5x_rUm8q0q#Kwoj_bp4GfOCM=LZR8D=r7!CA(PH1Iama{Z-t>zL3oiWd
z8T6=q;p^WL!P+U*cNYr1?=iqXz=je&SB5Mgez2?O(4d{ei<5V9*~QesF3^wB`VFzD
z;hkOBmt>s0icZRTsQN`MT--<UuOD^z-R!tP<`S|hT}=!-0m+k9ekyG+tcr!A&`-dy
z_dPA{z2aA;nTzdZer?5li&njGZ=!Ef!IHExK-oFv3@w|$B_AaxJa$hT5;^2YKQnb(
z12C;Y?QCw7*BS&XP@_+rq;`i|kz3uLK+*j<RLArj8l4iz!vDiwMQ~Yv9hGo@eQZ})
z5o;;VVhOW(81^a}!+*t({EN5q!!9)sjm1cq5bW`~-!Z?ML}%W@dP)x|rK&CCNUU7u
z4-H22Yq&qno@m5JcXjGC`fA3{_IVHMgZ2qOivNU)YEbU~HPE<=zi{oJZ^V1vM_Ro>
zJ{z4(SkBE+?<kc!IWF?l1~6*R$t_rYBy1xeLHXMXuIz?Z^JF$z&RH_3eIKfwJe@V2
zzSLxPB8sApTY&+2W;+Xmis$7--lFCrtu4M<Yv#|tG;;NVZ@D^bu9t^m?U!r!n@(bV
zQ$Jg>lp)C-&CJ;R)R&gN@MAPnRlm79wReHI=g)x<3>akb1-bLX9}l}}9SnBukB{9l
z^{@2)_kAE3{NisrZ?@riX0JTXRNkt#{(!qAvMr8*LzJxl6m8Ck!4h}fi14VE!8#p;
z6<a<>IAIt1uw;W1e)!rIUoi!v^TesGsW=B|IQzitHM%VEoNFltBTb?l-x=MhZ^g<X
zU=yWr-U;{bYBr?kaeRkfGb&Yl>QBC$o=wZ(ld3=$i~o|2=jSQWmMvWJ9VS3(J>ET~
z*u$&115iMHEe+NqH9O4BMLk{eUm-uKS5rS&m^~O8i!tWr%!O&oYs=acUO%&sjf?1|
z_OjpaH;V7r0v8t=X1woj{{PxkiLJ^Obbb;d&ZGnYkgEEhmz)2`eY^j0cFf$+#nj!<
z^WV9#9bRkqEit#fXX*y0Il|1+1~)d}3l&S#NVyb^c&VnP<!3-bCkjF!KrjH(X3oz&
zZ~Vv6fD#+ic3O7p#Qy1AZf5rz>zi#)#B1MHblFKuwe1`6+q%80Z)CoWjh!7{+VUwK
z5m`)AQQ|6>MUi+-XY^F|z6eR>p(Tn~KMDNBb6+*qDVBJ-r3qcN2n)5;M~a<J4OY{G
zz8H76-E!5m-Q%LTtge>G(~WCgWZ8$7-XpLqZp81qAC{`@mL%IU(H5GrqAGGzQn!!g
zznWa&CB?-}9tfhzg33!wRFh;?!zDR$qKu;3+=0ua?T{AR2B3iSk5_uf)qwjRDv>RS
zOArNk(UMPRhELB^{|^9TK%Bpq|GcxapDvrbvf7Vc1K2x}i}@-#PH#n#W;G7l4)r}r
zX3J(-i6n`#`Jw~_mPH1GERsCKq0H-Hl-|^3zHCIoC3bdXNoe*{)-aq&nddOp(6?Fv
znu=eSLY2+S+glg|^|P$G+F~^=C#piztQLS^r3%&M<X%Yl_3~y>l@n2G@E7Gm6vOD^
z<YJ^NtR&dx4h97YMHRlL2gB&{Ex=U8A4Pg!iP?@q<(pq(7~KU-v@B4i#jO}dcWIqA
zP4%@^Zk82kp8dC|+y<`FnRugKS;YbMri39<rN6S?J-{?0Nyt}8A|E8l&dv_bdmWuE
z^Zfg?nC7DD%YlsL@4@~~1fQvxMPi;c{aWO+A<<PemGAJg05Cf{1;9z}xMFnRfF0~8
zz_+4#3t!>cF5<U4h|}p*6|*656@t`JH~s~vKe+`uc}!QkgPon%@7}x_T_#s2|2cwM
z-@N?if7m%5y*YgM_BuH{KE51XUBTnttFQktehL43)zfb;&M&Xw)z^Rj=Rg11JpB3k
z`}4Ep;`01Iezr?|vvcwDH$Nm-Kb~A9N9S+fj*hNR&d&&R@I{ZyUthjMtZLEpKlE0j
z?hT{f3jb;7pK2+3AE6IBIM&oLp0WNf5A)<96OThVg-PA0X@O;SLeFYq6^nFo4+I{S
zRm2lpFT^C9Ww3g0R?*m0A0x|x8UzI3M+iSE(z&Ps2685RvA;;GbRMbp_kjq7{)G@8
z@3P4qu|>IHx!FHSJYt4onq>%#T3^Js@sK(iFPkin-)40)K-8_CX!_D0yINMpUkyz2
zYDDO-B#LB`XTTC+*cIM!;=ZEN$R&a3pcru5_a~#DVERr^56_O1v%}L-^hbQ0oSY&J
zBv<cVdvE{CLDMvpO`&G*zlt9G5%nZph@=pY!1xN$x294Ta^Z{szG~;<5Qy&?VbRO9
zs2^!{xdRf{ATJU*B7OOHKs71>th`7b(tIhfo@f&LQJ*0WJ%GdLtSkioYr}ECNxBEV
zH%)#?s|2{@UEfgob(NuzEUAE`5RhY!ccK#f6W9Sz1@OxnxWxn|R_qW2mahTn4<DK6
zp^aIVV>gJE{PyxMz)h;%cve>PwCP)Ha$xGm=>mkpw6AIhUX^u>zvExZtk6HES%r&B
z{{~*3Qx6jY18oe3&V=p8@t^oVL~aks{ZG3vxw_#%%cjm`SyeFbdlg9Kv=(m74Jd<*
z192i|*{5N&Ti>OXAQb%82?~rA&jauRf@lOnHj5Nk5aM-H_wl<WF)TX3H#S|sj4?Ij
z>9UyIsRi3dDnEFMi@(Au2fMpNW+wU<j#Ig4>Vpp-&3H~Tk-$8nE^+V%6b@m?>Mb<@
zd~5t2w=rg24we?{b5bX886x7b&s0ZhMJW+wbq%XYXrMF2QFJl`U5tx@W~8t(tQl$*
zsF8`qotUFaq{>FiT2ZlL#oY6KvjU)T{C=_e<_CNW&7Bwd3Oayjr_~B40t>)9-SlIM
zDl4dipi)gi%fb$q#U4B;rfD_h^@Az{OfOUi<@FBr2gC<eECnWok)6#1Qd%@NWHUxx
zmZS@GU$A0J=sC@5D5Qrvozzpx-PAv6IM$4|H&RRq`#7>`RpDfS3iGL{(g>cD9togO
zZ^6F=r|&dpX6mk7<{&|ZHD@#uguz%>7)R8!XpjY>_B~cejN^fgn?;cOIKbm;kp1JK
z>5wFNMSY;N=Q-9wephI`nmd>hIXS8%!mMfD>gYbhfmWnjf$}RXsx6lD8{qyWlZT=I
zT2v2eLKN@JX4D@=eHsJ#0C=d!x4F$1(VH~OEeZs)1h_bYBAyW2z&&muL0b%?0=t3<
zeN<H?Xn;pFEm)U?f|dY4Z@|qpCs&X(Qg03UaA$eV8w-H(GIR$bVt<#pqk?8J9?6Q0
z3h`kV=N-7w?w&4WbE=QTNtrU>t1GD^s7|V_oRBNRaQCAcDw}q6McK9%1bcf>Y!8<7
zO;z5DVsDWx#IA#;DhfISu~pd#R=DG-xLMxzkvpQHdaLP?Y)92mdyJKqAcYGIBa8zX
zQ0mnX%+G1LY!1+#q90j7-}ST{<HDJv(adTk^HwuSWWKT;+AoBdGB2o?3$6RT@K|Wg
z=p+Q`0B!d^q82kID1@h^KMfqMBw-tr1Q2RCu-<c&_@I!py+4fTQGycPh>}aufR6GY
zSQDobq)<<zu_-=|r1>bAIY@|Oxk#^ZMbCgbiTknum@RW!_R$#Z?UJ!Mn4uVxvPw-(
z$qb$t2!r+1c+(!U0pi8Q1Xx~KG-*~Sjz&$`H4Xk;5x*(M!5xK(sAT^g#$gu4o0HMo
zV_&Q0hu@7zsa~s78{Fdr=m6)%)~*ePpQQjh;J4C-)pArIz}P)cPDiJ&!TOZ#Fc(e)
zF)J=N`0(;$H$&?cr5sLJS~$0iQS{rZzx32Xb2?>uJ?t1EN!bFZ#cG%7yi<E$Ga5sW
zS9Q&dvqa=tsyD5rVMKVCC3Pj54XX)=&{;=Fh+~4%xujSx0Tl_?3^EXb`VY5oBPJ;<
zIVuCpI=e0A%0dKQR;5izvO>z-Nm`)r0llCE{*;&L)JWUE%8~=r?K;$NF3uJj6yaVd
zEzvZC3)EhrAh(G};`G^%`UGj%y%@dKqJ9FOg#l|;@uDpA!Or%~;Oeiy44gjeYAqJ=
zr99&Q6pMIP#;`q8z&D^V-<2L$;Afp8HU(VC@|Xxk+gt<JNLsk^Z)>TFF1T@oBnlw(
zQswc!OWAZIg#<~9=qp83xaF99TL3qYaLJ|U^j0fY;+B!_D|L-umsusIp#PJlH&x2d
zl+ki)O;THve<E*TFm_dIaNFEPw%R4pC5@n#aVpS)=v&Yn66qqit(l3U5qMnPq_-lG
zdjPU{-}f>ZlHT8Wfv^`9s(cQIfPv0osHeSwB#o9=9n``(@bg4_;QRF)_(U=lxNG4x
zimvMlEQUe!N2gcos*t)3xEOR5r!biQ;Q-jt^(A_pPtLBB^VdI&j;@oFV<Juu9xxD?
zaWMFyTZVHq)ynm?tYcZgmCm6tm+7{BuN@y*`*`%W&AGP*V$IUlb%1dT;N4r7H=Is<
znutXcjpz?>1$U@;Gb2oUa;TwBpSO<^weAd`I?Bsh+5udK!~pUcraoNKX!6r++NEk#
zGYY-ZiE+Vy=)ssErnprIRGmWmu+9PKs$53*Mfu2H7Wh>u<>dV-dc|{r3QR3M;?xss
zCym;l0J6v`7zf<_0xc^8VgVJ285X+rOXy%#uwi@-C|8dln1+&hr8y85?j1J3#G<~^
zqWI+kxLh_N)X7&bJ}vTWk~R5CTG^#kxa1(*tccwqO3Ep5q{lp_;{Czc_$RED-jl;?
z=BTIFmN!|r!q6YRX2Ezh;`(k0cH^U(_xS4jch||~@lTh7wbdlXPa{3vA~{~p=PTM#
z>@&-=Hsca#A#0L^Hso3xajH~0Au%I&?QU`a1p|BVs<VRh7_Wz#Bm%d=H#MG+mqL~T
z_Pkms6(4@ww5rKYt2rk&<@TFYfO=6Db%qN8z0bhDQrsD|=Db!K0m>-t8nhOaS{A7>
z2~Y~`)gT6VNm#H_km3`)1HLyE<UKLa<{~X><K)~kwgDOjxNPg1x8LT=yvY_AMnKKs
zvSz6~M^Ejo;I?Y?rlR7G9xZl93BpMF^kG#3thVcuApL}@17V9p0?T$;H|0Fqr;*(s
zTLUsyYgb1IusenPp(*<q^;!YVK!I>(XG}$)FQj#~%IHxX^Yq=6%^m1b55g!>6izWq
zCqNN5%SO6;X~RzXb?WY6vu0^1T}IF!9Iv9uQmNjtpB5R)O**-+W6Sbst)gKhrTKxk
zXFMct(gAG_51EnEsy;v_?y57s{c}K@UWU|_8igOJjmbt?6O%u1D&*#>84~gWqv9xP
z;K`<0GY~sI?DkH@2B;RW*tBpgW+_YqjxP;D+-k@SO3R~duO_vUBnewPu-p@07${wM
zMQg*8U|LREc=+SLuQv_ug;-dQb!9%(<kT#e?uf1?KyW$&N7f2XA`*_$0>Mg@-`v;I
z|ET?x`h~5!K>-{z53n9qgsp$Lh5SBSJG$zTTY&;+pcMU8&U)5%gsb7W>|J{5Tc~@R
zdfH|^6U}2;-AmI|?TJaHLf4LaMmV;FutA0*ClvB9q>HU(Y8ytvbZB?AWd-}IipHm1
zPY%%wRa<HzM~VrzbdeTDEq2+rX=-RlsO^07Xk-|shfupQdDFM-^=$_B5b{sI<3p?b
zV4W&uCQj}<SoNNy87Z8UGN$*2JyIp_VS&;@7uEhJsUFChG0SmcgPQ-?Ei0bE+YtMp
z?dB0`;14cqBLFvoVN4~|xy3c$1Fxm;LGFp=*-S9Ak=D1`biO4!9mF@*X=s<Yv?1}H
zuiE;<(s7VI1!&(}<6AAhxHaB`_5}Nyu72UMIogBwXWi!)0Rjr(VCca%XBBD6jVJVD
ztVmXe3B;p|AKss;*|Xg?q^l@3>BgZDcF#mp(6lE~L8A?CnCl8ujuW|;zl35dRrtms
z3|m8Ex5`+nUv*o`NsL*tkLw0_&h6OlM=e_Y5$e5Zm*{H-!jue18APB$3EQpGIJU&Q
zqY*}piqiq!g*c>2yEUyseNa7TX_h-Zv-YQTkEH7qhE>3Fo0`ot-6vUsJ7L6bs5{oo
zKhN^0*cL%aYOj<|U$8^WVWzr<Ev{c6*-TDPOGdmPi3FzD#9NwP7kG(nMBd#>3cwj%
zUY=i?dDWp_T)?bY<`!+XEuAOGWsS+hZV{W`TBJj3mG@_)L5Itg&4oxCbyx<seNtGw
zK=xubqln%HYKdF37@JZ`fi{b`nmk18JS*{b39AHngyIlV5^$@eIz=UVP9$bVtE#1$
zs2sLZF-N4K4)P1~qrlW)2y9pj&Hfw5J{MAXAX#fB4lU&bw+krXxc_oU6}(`qAoXmg
zf3-^og~56OkDs6?z$_2n8=&kGC88~C`^ue^!W;|~{b|1e(l!R7lai*;Fd_GWQ2zM;
zqTy)9L#Zb~`{_-d)puh0G;47!sOPs8dG*vSV=%MKMQGx)OhUOqQU}dJ^u0Wk8Hz5_
z%uI7c9>1F{C_8x|dn<n{XXD|U<m7C0JydV6&X0afuC6afho=J<&RS*bcQh&=ag$V@
zc!=TuyfPdb&(rzMG>smsq3<O<u!g|p3Quvs+xj4xHvyvnpVvOE-5Az^Jec*Sv7EoL
zcGHh>*2cl951Tx4vLx(&mti;9tTQ{V0Rz7QNix^Mn_HWw_X2}-`@QO>H;i6xYhcam
zTRg#VSVXiR9hDQ3zqlcZanjI3IXDt8+szvF{3y04>aoG(ZqLX7GoeuEL#uA@JKP6U
zE}%Rh->AyRL_>5!eC0G*8#3Ok%gqnNW#TPnl}5eW+))d7%l!hIP&Vup;C{1cDSqAn
z1hg|6QbRwqwV8*y7GxfOEu*`|TM4VrGP;eWZ3Fupqua8yZBT2CZfK_;bd>qw?|yWL
za*-(<%v>m&&W!7v9B0o&ssI{BxQ`@yeE!o}^4;O}=%+(>M>q!gh}+q{Wi#9Rr(TF@
zb&&CT@=_(%bEg;4F&$~dc+@|bLbW_INz4iu^-9Nq>zQ)>MxB0wzlCo=MSrw>16F*6
zL%{!gTmpX+bKMP3K5{#^za}D?+E^!|)e2DI(Q9d4$Amdy9`=GWp~WBe_g;N;tGm58
zbO+7tg^URscuV1X@}kzP5vLr|NT#J=Lw5?z(}06n%IgF7B0;dv7tP9pB|CE?ph;)%
zg}4(1ACNxat|}eZhCR-}7R30rUau{CIz5B<X@&I5!63pRu3TqR3YQ?vcRfvkT?>rI
zwrr%!8`4rtdW4cxerh(pF<y@+3N7*8R*K6;ILO7N#cM@XwKu*O<Wg%m;*8F2^s`vR
zDpqdbZo(p{a~WJ7y@B?M@=<H!<dWcOa@U!6TTz6*!EiTzssg|)_C>*Q@Jhw8MUk?m
zpp-ra&wVg48eL{H$BZcel@-F}pCPokGf@;ptQ`;l_k{+=UOE2m7BQhl2d2TW^`lRZ
z2F{FHO>#Kt9jrQL3vMeq!qi~Sk9^Cp%vI%WqH0_H21Tg>q0#}v6!7-@0MA;vG2pAc
zj{~cYUYlc+_`!3h?WgEffu}?S@Uo?)l9;TO1nWbYVPuGOK$O)oB0`nWqr(zs&}KL6
z+mSs*T-Fao8-T=C7l|ink;q@zexOZ%xR~-(byIa1LZ;0WTcfY`%+!PKDr#o7uW<vC
z>b~_h?Bor#&;lN=8m#8~i2JJ4(DdEjoK8?Z(!Ap<k61Cr%UcLiIYUnxk(YLlnlgjv
z+vuf>Zx9V<dw^C9W7pknYn9+1*VXDSEd352B_>S1*W*KHt>OI83ez{_MFIErYE+1P
zv_{;tadi0h?b_q)Ar|KqanRm$@WymjYf*tEx_%>K+jHmv$Q_&Lmf+D`+8Qq$PQAP_
zb%L6}{ouUDpc6LfL?47SIQwVbjofEGxL)pCXS-NOV*e~gfnvZJZ51f7js!s43JL>?
zW3Sh`v9}Sw0Y|j1CIiu@7=Z=N>C>eW<+3mdZJ<ybeFqw^Wmc=eHn0{ty||3ezU?ni
z8zw5!$N2U+hQ~EC2)f-FFLc8V7ncD`wywu~U<)N8d6E{9n1cxIlngBez!dhNsOdk5
ze*23V#Cx#{nk)W678B0yi{j07Wn6o!qB~p$C$)N8K4aTYJeeGX8f&lOY<l3ewBLDf
zU@{p-bnrz(37rFgv9JySzzC}IY1Z<Q%iQyWb+sGpi9bMoI2PB17k(>}p#;a5*ET%C
zyh4t;?S*sENfvToxb)Lf-}*k01{9%wR`a`|!{%J!$wvU0zqTfEW?*zES|K)Szkh*d
zFIciohTL#9^!I{IpedMWbpt#TjJA3rinPi~J3j*ho51v<4P6sMot8`7K*ZyIk5$%a
z2O`C?%40yWYbZOT`T@KB5wpw~Lyv7Bo*-{7+f;5KwvNg;1HcoTnbS#Os&UZr=#J+p
zNI2^c^0^FP>m$BCTMZxGd&&{t4ozeG!+ZGo!UKHPkoU>mX{W)#Pg`+(l5(?=F9wEI
z&Swxu+L(4%uM^P!t7azTaB{f``=Ac|lNA<x&*Pipf>+tk`c!=w^<1{s8-$a_P^ZMN
zOMu8VH;*YJ7+c+NItPf{wa+xz&0&cn6o5{>x(OVGu&XuY71^t9wF)%5cD)zMSz|L2
z39M)IM3i^hrC@V^OQSr(Z&nVa!)UA@kE5!kSkenyudMNi#6)_VYI&w%$Kb@yq1XYd
zUDV_zg2^c(ldJ)i&5Q)9&dOpmy?5*cA3ETqugR)u<zC6i)`#I=PMWeJ&aQyzm~j<x
z&i*$y&}RJ0!+ad+9d7cU$@MghD9@MzH&**rP=k`r#d%uX#yqWKH(5$Rub%x{Y;tHN
zPzLj;{91U=pQc2%d{GEidYiV_3=D(69KDfQ?ACI5EJjRTvd2w1(oC1XoXx8jpTp&C
z?uFg~$-<*=qgV1AF#ZOPj$G~P-O<tL>Z+w0$wIHF(CO&<`}5=E`sa&LC``^jJj>Fb
z^P^o|A6}mvB^NN5C;y?>HquU~`zEXl*d>U^Y79B-yGE<1fAse7>WUBzs9F}L%to9U
z`#WSkg!&g@7LFube+GSfsb{dD<>$)0fQaxnWATK&o3BVFO+*v27YEyv+cx%KQM=o_
zu{vBpL66VCce+{|O_qn}{Os!b^4-z(`Q;XC@)<ony3!Y@UtgpQ@wy5-d~kMJryn|<
zGZ&nWvMo&I3rCz%S_u*+99STea$15FpR2a15mEam2D{{<Zt#D#Wc)0VIHc5>pxAh!
zkIy3?fekWvG>h5_Dg%=x4k8uENqIkycREV(m(MV{akEr0g5Cx`bZr8!C<;aql<kv3
zA1@D(!>41#r$1k^<3~&PcV|DIVR*<U@ou9Omg23!>@N1Z@D<0blY$+fAN_dv-6&`}
z*jzw%t&8i}{jw#Fy=Shauxgv6aoEx^LRiXimfUat9VX%$24w$Z7?bCec2cm(o)4@o
z>-VScGx6B+TO)tuS7Qv`Dj!vpyAw9(r%OjhqdALxT-mTgioTJ|Qh9t$G8oTf>CfbD
zHjun@b`10Z;waLK%3_xF=56Z`+hRpdGB19_LlWj@Pt)pE1Cr%9D;kf}Ozo|%5MLv1
zW%;Lx7!8hFRR=CGx*VMyjYwz29EQ4n?0G8vc$KS?Psq~=a7h-HrPwcZPA!}$ocDn~
zDDHtGSDTLDi+yjO0zRSKkXZrte#Uzx9vovzvq$hGdZa3t_uj;SBcWm{ixQ95%w*aO
z57LZ+Ic}GET5$}Ml;|Nppf%(N*7!X^f;@rgJuioMZ8Ad`0cafwwVsRnYKLi?^-2Xo
zA@~1{`$|*7Js)!+wX_X6Lge`q>7S=vVvi{~8&F*vOfpJ^0#IxXSt1jZ0~Q}w8TjUD
zR_Mz#FtT|8Rt|=#HIIT~+DsZ7i(RMI-r!eJ3$)5tfdaygoORPXtE3i%#a(o)nFfwd
z^f63hwMq!JvCIQTs_=I0O4xRK-IyR=FhCELE+j}5*NwYPi-K<t!Dv$IDT`Rifre5X
zN<=mZ#gHhZs;6>Kk$AYWJAl9_=nF3vaByP3A7uo>)IXIo9L|t(V4C=pVn!CQZ?r8j
zEw7=e{$Nb>f~nHLB;&*=3`Li;z*)6olR7DCDllaC%Gpd47e#**8R`o=%u=U7evM2U
z0&i!_TIavmUJsCx-^wS)4u5CmFNmyhANSLW63rfTUN+%YB&L<9L?#Kx#mg4tAs!BD
z37vn?-#a$bJ2%qZgB}>gdonX<jj_k$p_PMH0-!^8NY<%7yQPlSP5F430itte(|oD@
z_VA%G8dg3uM!kf#1!#^czCdT&!4nb7+dImTZCcV<jLV{jCzUDvXI_@~%Y_V%pbX$E
zeX2m(#I)sCQ0%q=rgBv2t+D}*jXumK;b};K{XCUg3bkovK#!ISWXROgD!R)kLR5!w
zSUZfIau8Q-dT+`QDpogh$^$-|WfM{f+Nb<{65QwGeny$5NMxNf#|`6dfeR95`(U2}
zrZR3@2+PFIT?Yl3tX+sackZalf~~_E>XIhX>}ou{%G(n(jti6howdO_)+ZR*>11On
zsZL%wQk5`DAbLqUBOgbQZt}CbK?0C-E1Vn-CJ&l|LF6jCVHl9Vgq@fs>vW#44cOA2
zhvWih`~n1bf8a;6v}giFy#LJcmiq=wXw$D{<hZ%LT=%;(*=xYKk*+6a7w@h?%RGBO
zy1X7;uF<*HK|$0FG8<xV{D?Ju9P)`Ko?F}!WHLTL280FkK(=<1Twr*k&w)10yUf)C
z+sTDWelV)W^@KS(e0b@HoXTzybV|!7oZh}498l6Vw~v7swkbB0cfrF&C&Jm@1amf^
zE&ao;XvGgAO!S+mdx0#V2R)gg4c_&<Ba0*mYjqb1=)rX963J1z;^9%<=mi|>&=e3w
zy{v@cS){X3R32Z^G*9tBML}0{Pz7yvpyC4@$&%I9wMzJ*)VjRKPxl)c3jR(Mm~<Zv
zaFWMTd7tyOdGh`YGUyfR)O4Su4YEP#ss={P_(nkStgHrYslaJ#&*_u~=xcWFT6L2Z
z@Nm_uLbOk1EsN||40GV)6U}28O~K;I3g|o5X7`etsLpY@M18Vc3huE{8K_EXfPn!T
zI5KtE@_<BrYJ-ibv~~_kVwdy*=dL6s>o2cz4Rl<`N0+0+>rp~wH~Df~8&5>qf1ThP
zD>?8}H-FlZw_$b|4lgu3GQ7)ax%Vs_y#-J44RW<pDVpG6^wl(*!!pIwi(PA;)_}gX
z2I)rTxu0-Y1R8<{%DA;gOj*8i%{~?61!|a`E4le&I!o=!rR4={n5%3Q5=huNIxfnY
z_0+9y+k2K)_2Jd<>yKmUfCKJ5U6L{tjj7g$S06vX>)=YV`*lde^)KdLK<moLSQq%#
zoZr_&(0Pg%LjtVHLM`Xz22T!hYeKu`vy{venh4XXHpF!`z+Py#_VDuZ(9gu&_OL~&
z953W;_G1Do*-(kM9f~eZjV4M9YnmOvdMh|^bAYbX?&cp4e$U<F&teqz`S#Gcc6e1s
zbn?2f){~bGRVIqChe(xl6&{C7<(t?o^6fZGmf>Tpt#Rtk*S1BPJr}fw<*81N($raK
z(^&p32CJn5s)_%+WQ3InYQmez$hV67N~y+_MMz8%vQh)fqJvj$oT#<^|1y1i-6rT7
zjeJXO+|KlYDO|%Yu+9`>?aWZ5E(fw1RYE2i%Vu^L%|&|h=F!YQWy{{tvAL%qptFWB
z-@->?AG3{!JrV7!qXFN{^-f;PgP1{l0t;oErEi^qaISoC=j1fKml;@`3lgWoy2yj_
zxY;*%;0M(g%L7dMgp+?kCQXoM;q4GvDc()z1Nowq^Ie)Od@jk#@G!&>@7qZ%E7(hk
z%^G&`+@MkQfNniTWG?1qMY(SCY@Sg&mCR;bmYB7CrTkJx@Hv^rgF6BiMLE}hT+6av
z$I<Sw2e?101gP$|bIfZcL|4uY9qzd{oc=qJFXZGuq#$D#Dc%B5NN+d{5->*6q&c)4
zjH5-KhSG5!^Bd%K2s6T~x})@lQZq~22&3JKHi_u_k6=w%!vLkVzDp_%2VdFX<>I$|
z#AL|^y^*qG`w`}l+LOPu*L0TUji}@eDzwkiq3!lSJNfEOS@tMTjnP<6SMYF%->ko{
zQx4vuMSDY0-taWwgw?~mk6vgVYVui=1xqRK9ZOkz-Cb?JmQsh1;s=U9%8A2xZ)ibo
zJT&sk8_O|{5&3*arrll_tbjp5udHrhbu(j_-9pu-ut#ef@xdBqDhwUZa9dtw^g=Ku
zDT=<y%kPnF$N=L3!Mjo^w-H5scNz`)JOeF<)@g>Y0(}sDKYDwyg%Xb+Dk^<}8&1`j
z4`9;@(^wOA#h;c^FUdY|&abnIq0Ke5I?Vz6e2Gaw8~^^4fU)y@6T$HSPn(DJfpPzT
z#(rbq!vo*gU5JeQimAO715B@S&ZgM;u5c+Qv`{}UD%d-Qg$I!&U-8qT0_%cis=jV<
zi-ShGEF=7HB)+VZc!A5UCwVQ?g+Bd;YXaGE%Qb$UaMLvh0+1Slu1s&YHgJ=OZJjBn
zrS<yTvAdIOMm)4N`5YH(<L7CYYg-sR&js7~d8SLYX$?NhMceqXF59*`{bd(!JEQk(
zmu}N6KHJ6H7LLz!`8Ix@bOAS^ARU+KXDuX8UReQ7c8HOCgSTA$`n%fP^_UM`FA=rx
zwl@4`Xb+qjR5e%!P7s*8&r3`#@e-XX#g|L&?CiXtR3xttkA94<E=ETuZ%&SMfSKo=
z@MFx-{PA2ybhDA0@>wU`;y5R=w5{tdG+uNQPVFVHd#5t=c}c%3v8)63nr6-NuzAIY
zv28c7<$_p<%DUl6o>t!+enjq=Rxt8Yf{HrQR<92p!e$kdE^}v?QP~GgoPty2x=_&<
zxaPfEEH+rUPs#X!6KW+#3FJrgMN5!maQ{N^#hGP?lVu5$)pu*Mo-v5^^mtCLC&hYq
z=7-*xSK7p7IxlRBbrN>Hi121PB?&2}r7m-n`;#i8pc)KoM*5`489OyMU2bCss9=aU
z?qHTvL7S&)6Gxalo#gze$Xp|K4XiPD4kmHt0uy~TNtc+p5cQF%NORtoj@{1Tg?1R4
zPIu(B6JhIYsqIv@oAX1vlkBGbG?K}r?6iH^a;mNC(W+(9AH*$x9rNnsxsrc7;NVw-
z-++DV9K`2u6Rhn3OD;$6u12`_+s@rrYa6)9K<!_X)!Lfc({y#C?r@_``C8wSAkt#F
zh?a|gb(@l94`gx{nV5(7Yhg48q0(*tF^+rZG@JEKhkfaeP506!Ee|Y?5=DLrSlb0j
zZa})d(jy>=+Bl95ZBRZJvV)1GHxQAN+X7z9o3E05o?>A)YD3e(nAXVY@2#yB3LDX@
zvLh2{Xi?gQWE#$_n3esW8k$Is4b2*7%xnR636m$b|M$ThpA7tpmz*I1nC#zP{$<0#
z-gd%Qr`=m#acFS&2LpQr&<N2<OFJ{P8jHYaxw-9!QDW|Bv0G%K4dhnMipA1!6muVG
zcY1jBaqj$3bKqZ3#>xjZvgY`;*NN_eZ8_P-YZHM{uV-71uhX)<JGAD4vhIOuU8iP$
zy0a<zK%SI1em#(<T|29iC$1ebXZhG!Z6DK&*e0Duw{h_nYIq>lXWm^0+eq^@8~07Q
zcp9Up%YAaknH;tBewlE-Qz)Pkgz2X8m8&GE06JU8mze7B>v<q_l~6y1{#xU^ZgB)S
zZ|gpZ;O##lonm|94AkyQ1Rd0cC@R-3we9^u_7j;9R?V`?b7swF8%l62@bkrMb@Mvi
zzOUi_YUUiHV%6IEmvl!tC4#SXG_+@WTl}B(G3SU^yVO{nBdE>oyYlvW{~Gc&rnkE@
zxh9=}Esol<vpe;UW6l}uH<@&u)|QgG^w(3JT^&DZZBK7yEUr^*!;Is|=5gtCWuNg-
zS5L3If|dC&GK@A2nq8B&W<*0^Wy0rF_$?4%vGe)j&xYW1`%+y#pisl3mD&%1N2Pyz
z)urU&$GtvaDA5}P^sAXdUFMTOrym9k@>7);KNK{mKH=oclzmzbtIW^K{u_ILX<c%}
zM9!+Y?H2P)Pz;VlZfFr}m{D02tM5i|p|8Ip0Y9|$W_9Je!^_u)-;JOjZ}D96*1ZbS
zLa^ji<VHLFz<vD~Laa+6Q@RLmPQ5Gc3;PZmG7mcMm)LZFi8+#3Ac)*JNBo?Tor;!@
z5VGT!(%M#%daZu-^c|dQ?~8GnhrAa_Uf~nC!dOqt?|0kLx{y1Yrda|g%4VYWPit6f
zf-a~`E=SkzF3*zF(bW~uq%Li(>cX&kZ$~hs9!R~<V`E!#Fmh4Wk)0-$AN4zyXge(u
z?r*ngUtbQ<|B3;2f->XNTdmoY4(u`8xY#+!pQySy^v741AZg>)oxc8xSAqB>CQB8q
z9=ds~_324xG@XF+BEGIry|4(|O$DWHuQh~cO%4;DPjGrNEnh1p27e-L$XnO2;vXnn
z+FuS8-Tc<t`=;rmdRyE#z_==#mwvegJ_B(%fDBsek^T|HK%M}v$QZ-qP&q+w@+Z1h
zSe-xDn+YB9Yll>2#oSiTGowKuC>&~Tr@o{RqFFkzwjmUZJ0?PP;zz<k56_ejzK_}V
z(zmkED+Oh2+8WFFGeSz8l%U`#FJ`Ka`tp7Y89k+dX;>8cBr&e-)HotAicW9q<xMRb
z6_qv3CXIol32aO(1lY%T7>!Y;s^0^AK*j}7K%d6PJd*at*rWzyE{ZR)5b8$67{(hh
zuBCmvhbHs}-QUk*v2II%`b(M7oyUdvBs00cGz%Sjs3l7)ob_-@*Ml*MNIIlZXl*>%
zF`Bw~I-R<;bR~VXg%7FO$@GEl%1OpK+td*~6$@#hwv|hOdh6~E>a>$BJzS1YK$5GG
z`{P0Ms{Iy+8_5Qml6h{_Ysn*8xzjS>H`=DV7j`Vpy586)GO1ChYUi_t-sGPNTC0>A
zt`nS&B=1q-Mo$bv($=|!q`H>&^9KOa{AboU*wA75wO7m*FwLFP8J%!@5xq|9Y(jg3
zbbB^h@cctmNx5ub;iz-mU|YLcqnJ0y{QjM{P1sSq{mxS8UB|$gpTGGbx%%<sLfI83
z=Vw<cr+rz2nevcT6lOz;f_d^GSo0%{#3Kf{D-cxb`tqGLPJs8&saP1xUht?Hs1pAJ
zP)h>@6aWAK2mk@Dn?X$N_TN!E003~Y000#L003}#G-@wuVRm6(XJK?@cVT%haCu|R
zeF<<JS9;#-nZW=90t62cBt;#qV^^S-ghWZZ+T}`GK>#RGB0+8g6eVc6J&oxGFu)vO
zPY+4#f<BgV?AWfXuT4@FJC(${wNA=$xm-ylcH+d@O5!+irChmOan`BIQI*6=RZ^9*
zoy2x+{_p?ad)+-V)aB8xNwGD3z4!XP|Gr-Fe@~9zd+N6@*OdNKrhhNKSWumh6qO2<
zTBW0-!h&izs##F01=TF7)uM86wWt~;wOZmQN~$rUnq{?Gw)aNW>ZsDc$JFW={~l3|
zifWFl)p7gWKDD}!@0C?!LNzDV>ZEG!SF8K&6H{t+ik}!&jRUHAP^}(R%|mMSkbUB?
zT0P89jH$*ws(D1M9#PF{wK^?NR8-@rY93Ro$K-rmHIAzl{Mx4)_p0UzwR(c?OsHs7
zZQZBV>BgjbTd6xry}5e7itfj+)d%?YgDTvw?l@}8RnY_Z;pnSVI9Yv2g$LB?!z!dv
zKB7Vz<)bR3;eAAf_o&s6s^~El9#P@63XiJrnA#}uTQe#=jt*B(s_<U^@i>3Z3H-OA
zRP-?w-lyIkQLCp^ct1aIT7?hrk69Hy$UmM?q02v>RN+JX<BSR)R?$=ZL66uEdRm2#
znyVkNSD#VgN6pp8?A4E}aK>CcX|H}lg^!!7AG24VRUwV;1r?sQmp-XN8s2AA_=LUm
zDHT3x-aTWlo>k#f=IYb->ZeusjJf)8d-XXLe!^UR)?R&Hg`ZT>LB9GadzD6W);#-Z
zd-XFadQnAlDtwMF6=)6V+sD+_OKN*mb?zz3Z@PGVYaXveGq@2yRE!o>_&l$AO9Aqh
zXI><f=|1Hsb+H|G8<Cr|-FhpD;-HqeH|xo!+g{s>YDtH#x`F#@a3gSEug6I@Xt)=H
z+Gf2Kx$AMe=?3GMcawIj_vY;Q`0GJj57rt{$4xea#BJ6$HWRnj2s)e6L9!XSufBfK
z?Ic00`wUucw-rWlr`C?6@fq%QDQMPP8}59&bt7seb?Pc?yRCNO#!;=^Y(}jRW4~)7
zowoEz!&t)$5#4Omx1+}HNgCsP90f@fy431IRCAv`<v#V~lb;$NKO1zD&31g&eTkag
zj2cmM*IfuUqE=AvjMqVOW(ssj4qz5r^;$cO{6-zq*XSfubTL@#v>V+d^6Sl=cAPBD
zICy-74q<e?8_DSy4$1BgH5^NOc;Iq-hd#7~U*|6`%5@O9l}#FK$d7mz+O_TUjW}p+
zMBLhD&<T<xeoERuY~NZA)}wRvR?w(_fx6<GI6N*t_tC-kZ_t#{^!h%ZFLfJ@SAtg9
zh+<1-@$gF^vj^!%M2PBu*aBk<%3J2PY8X^OKRb_S@gek-++LbV=MFbIQDdDy4vQIw
zau<irC+M)&tvA9a*1~!xnQaEOcIVdVGf&Qb@-zUo@kF$-@x;#VGh0t=5t#TJw2n7}
z-PxU8Za8YzlZVhwfd@WQH1qBIt)LmvM~?GJlcC@Dy#uD_p^U+sq*q<jf7$6`On9(+
zQ2&_CzEM}yeYZll&OG((?348OO#OG)l&b$89lA#V3U5C1s=xf&`OE(Nr3)7p=U2{O
zT3YVnt9GMK7gM{-$Ao)I;%>w%fvFuaD?zBQ_LSOE>UII^pA^(loEOzmBHBl3-u4{O
z2ss<M+tu4ewO3GEWwlqt_1h(tjH<m7E{6QyoszmeqV`5;%`w346b<Y=qd|v}ekZ7H
z6Ue*mm{GIic6Op#eZ5Y2xVGz71!Me95?4Wd=#6og{%qZ7x;N_4O&9Cep*BEcaO3Pw
z5C=`%_i_KMyFz`KGqiAXvtHX|0>>>AIpc1h(BqhuR?=>z1sU2*G`lf-%Ee;Mc9VKz
z_5uOINen&nCEP&nPxiHXd-m~@Y4`jvuhsa{43?bfg@FXW)~FNqC-=}bB95=0U%W~y
ze(~bm(t^J<cX2T}AvgT<7cXD(R{Z5FFXiuJ#{GK8jYgeXuoL;M=q3@TR>ah5ksdTJ
zT%Pk*k_kq`R_KG862kLSL{9uTO1g2&zY#RL5#!WSyA{op4I-HtW<*LSFWHaR^B*$`
z!k}Bn`$4O7Gm5$AY$wCPeh@FdCS!UM&7mhXRw;hM8Fi+dBMwuvViq@k5u+x=V8L;V
z$s=IfVT37Y6maB(=6I{5wnog4viUKJnYqJ{hWyS^;?e1Q#CQ4HfQBk+YaG&O=AB!Q
zqVI)k_c6B0lly>VbTj@6ek@=Ain<yC%`mCmv!M2I9sIWKuGhgA8AORC-rS7hNKR?W
z2rOf_-f<i48knEJe|MiIOkaHRHqoZI3+6F9`-J?58>ca=PjruGuh+V9Oe=gkCKefV
zq9?eO&SnrtvN&B{(h0-D*4uG2NIZKU>=3I96HuylhhYrE<sVyZT4w-GSdW4640FWF
z8vKM%^7T4MH-mg<x5F*&#L;^F7XOTH5#)4)@{j1II|8~v%<p<19TvS$K<_c<KH5f4
zFQLI{hO;uk+@$lkv)?)BJW`l+4mo2EPfQKN8q;K&!SLUKL_4Z;NF^-u5dgLr7ztXo
ziW&?%&w+UYPDk_=`tM8pWGIB$0eGx4sN80_3z<A`6hX6(63A>2oPkD{RQ$c5)C9Vu
z$7uOb&S|Cy{-kk71L!DsIbyHw!wD%bgvz9>(D9B_xJ~FVsrJfhYroo>!be~f#34or
zP(drY57Lrc2)ZpI%7I&bSthY6f+o=_5jJ<?b}j03@-!X@eOBNYgq+~KA?>g%iPv;%
zB+$*e?)h~R6y;G8+YD_^xedM5g18v%Y(`DUZsu9H+c9`yxJwXYRnMk4MV;gK<=tmq
z#a-&|Qme5`AAn&8@vd86=LhJX?fPZ_d75O^MuWN!z=~S!)@i!X3WGQljD(0w{Dod8
z$$FDccswKn+hzhctgo*}z&y7~kUuL>UxlE}43j=T5~-Vqm+RKH!a{QHBbnu;pJ`8d
zI^JQnduHlJ(B4TpXEC!u9Ah!o-RM>l2QFQwuVVtW1MW)y=XPdt-K8tF+3q$-D2Os^
z#xubqsM;5*E_WS~{C$d5gQ^)sb@pU-Y+*g}Nx~({Kwv}@TwNhSsd_5?AdS%m`beQ@
zH1HNEXv>Z(*`y`Wi({Tdn;l-aj=NpQL>thJNy3jZP}}M@*P<Ac!e3$GzZ1nb%z|_u
z07c-h$M-Nbx1^hvm54Np5NBE3!`F6!TBq%lbPpLhQ@6j&t#=_;oIXuobeiC9EpBf|
zt<yX8ou~_8nUE67+lHM6)~7V*MxRVqJ4x8?CQMT4XT3E8fx!cx^*(|Fh^luQR2R|>
z0B|vm+p+fuu0Mvuqqxabo8$#F*=TQ&3Z&JeweE(8ukuFBV?=wsGe+)!Is<8DhP5Z=
z88oZRDm0REzjh`x#TR&B=^z6I-h;LgT1Os$kLHHM6d(!-%9yLmXz@O@d#LP86c0Ja
zibtFS&bU*i<6&poIaVU+1pKg+WeY!uQ8FJmDv|>W<9kLZCz?)V9(P!p5E<b%6MLH8
zZED~W%jiWdnH-Osm`madTNP#&+@{DK;3LNs33c?ew3tdjCACb9WO)YD)SV>w^P+?V
zjvJB4^@N{*S3<<lt?~)$T2(O=FtGCsiDjXx@nZ$Zv`&-6gimPidbhzC=T=D%Z~NTV
z^XsHrN19C-0Bs9xW!#l!dz6B-KfypIgRL4-n|3P+>McV|xQpzY>G<VE{4zKzA4<e4
z*599z>Df5Yy_XKHwtkQh2cwo@xL-VTduaw>m;go3onO4LkeA?(+siL6vVfnv%W7*9
z#E{VIp?3_30#VJwYYDW=rAz+B#fvX3dZt;%fO^M|7!9As!;Fg<<P58eR%Vtlawq8o
zFp_xg&}1c`Cx*{G?4dDG5H)@;N#}<O_c;~kNMXXM6*UG}Ecm)$!JdG+k%<Uw3DgOP
zp_}4HIgcHCM!g*+EGd~+#~}LvOIV(vApuL;4EG}zOMV(N11y;^SW-^01VYY;Zcc9X
zp~zbV5zA5XF4#dpmBR}FR+SZ^yP}A&c#<}N1F(KY5GACCwWt;l3h^^U0_z*CCdmoh
zPJ4%hoCG|oW!P6OXo0~Hi)|BoXtaYcWnAAD6r!PL-{J3;$N487k*_cUSK7_{K{$|N
ztJ!LfMsj+bF!IP9t+Wy+pm|DmHE(G*yg59QqUCZwT6*(9kz)oAy#*W=ap*xe0JC=v
zhnJ0BA__KiqTIMX1f=U2r28?Pfc&U^W#>@oWZ{qlv^(TXIM#q?Rpa9t?HJ5ccxOA0
zSoOMlIbIMDU1aIB7MVpO*=(~NV)27ko>0GM@(-I<)rBBJO;{L|1>X!ryhm3ZKpBHt
zfT2jwvux6#b{j5WDM-4p1w}nJ>Y6Ztn~9_A-Fh5_BqOpm5JIwSlFSjxu<~!!f~>Un
zK9NS6$-XLA?t(VD*Iw2mN-n52IN`+z0;Yf@pqtID40KHBGaU>Ew8k}BNy9&j)=Es!
zr&4ILL_85_+G6vgSRe1$-%OH^S|f=A1lcaCct=xM`muXI|7QJ^fZCFIym0mUjw;b`
zw#QZ6P>$o!`5P^5e#R)4*kU>!jWYxG#trJnQIp6Y0h+dui&3meXy}I#jQcEJ24NVJ
zsPBCWufZt0xU}NE>0iEderd(O^wO(~^DF-O1%8*fEiE+Zf=Lt#XU0MhTfRgu<oNl$
z>Gg%h3w`V!XuEi;7O~<a3klVLuUjc#ILmlcu^BR63pOGeJppws(J=f=mR=Of&O^=-
zr|6Um`<#iwq*DU{aI*9QWN(k{Xjr;Awflgk8c9K~wxictVaeub$wqj|p64Z-lO=QX
zl6C9U_<W<?(L!dYUE3yZN6H)1hA?UnasmyZ$}xG!KGgxwYgz1MVnhEyGp%7#;h!QE
zE+FbdxJixfw!7|jt9?_nXF7`wr+ob)47iO5+*GKIo-0?+73dfGuhC$E-ZBR2uSqPw
zPV-FsT8IoR&}||HaZ9wlOEl#8X1x=g(gY`H(3-$ul@Q@^XADc1o22ZA^%@VI&C1cO
zokqP@Pa3=27`l(Lld!hSBnm?^%I3VlCXINOAD=l{m60q7gMyKnrN+)?m$bc`Em8cq
zJqEqySFWu1-ojOnIT*L$tpW(y2Qg(I_6^+|4v3x@1fn(jF~6pat+z5kAbP;r?@T&O
zo))^z<}Q29tTPM1#m=lxsy=6y1P{FJAZCE%KU^%qhK3&_hz7uqGJIZ`7mEV#=<~u)
zZ~xHvxmZAnktDtktD$yu)!&~Jjt9hTtu<(J2u6l8IwqGj=A!>T9o%<hqDGm%vw8YI
ze20fp;WdlkX7~O7oEZ%IF1_1eN_M?iXWQM0yo>n3&eXMGLKQ3{zW-m+0nT6&Jt&tR
zxPTYpu#Uq94x2b^=ceh8?LY;d{ubH*`j(waaiUnEzienXRE)h#7NJKBLVqrc(1guV
z5eOZXV50_Fmw>w(tw#-7j~TSCz!Huh<NV4flYxD(a^vCzke;pBFBKs3c6QpWP90oi
zo%AY_-h&Mg+LR`Bj0g<L%-Tyc+{o~hAXV;Q;yT?4)`g#C+X_f*t=X7`(_sXCVK!~8
z83nCQYWY2#YI#uc!1FpCakDnNjikPVzz^;YJW<5ZCd}6xk@VGpi5V&w?2lsUjCe%a
z@k1<)+UmClb4yntEAwbECB*Z(ouu7#&$4OmY&A0>Rm1pfOo!N;*+?9S;nhmeBnq1*
za)40fN9bEv&FFi^&SLc}x3|_#Hc4@~5v5uK2f(ZcHR8i--9+2K(X`v??uh^TuG5xT
zrBmADOy7g~Y9;kR>oa1!1~*;{YTKPz`YO2j*$*_LX*T2k^U5e(zA<?CMoRXJ+BWBx
z*zHa~MquZ=!(x|T&d5&RAo5|Y)>^;NQmp6}`UG>Wz2xaPv+WzIP}CubCG~NqUpn5~
zcE5k-U0TgFHk<aa5UmF^&zQicKK}I6PtW#s#IhfY9esBEEV>v}(J(4Q$b3n!sAROe
zT?YOh`OZp_nQ`>m49;3t$#xX&WZVfxA?~nEz1_{v=x&W5$_7R0x9t5TA-`8}_>`gl
z_+sy>(rRBiAKt5Y46DyJC}tSxs~%!N_>tjIX)uD{S4QfZsyOB^^Vggi;gT}ZC!J%k
zWQQ5I<?dl^Qh<KIUeBwo`$;b-v0m^h^a9);F)mR@y|L$5o$h$b5{!Utm9JS5XnCTb
z>47RPMaj)}ye(b{=AO_BciKBF!m?S`7s$!TmKkvg6P~#-6vittAW>GDg@L`ZpZRaa
znId`iFiEoq2F&aeN(?j^(~KzATFokP9D}3h&*7O9xq^c^4rVy2?R*f(XqOLZ#pz^t
z@z7xDI=woLZ-x+}3RBJ@XMBJ;>J<Ihy+?!6mSaLQuC@*mz!Vr@9s|GtNHQW+(Vzu;
zpF~;t5Wa%#0{%w*M&yIUXO{DR`)Rb4_c)X9h#~^#yAScQIkIeGbE990q5fA}C#>>p
zkwl*6ueW^Q|0)=wTVPkNQR%E-Ps=Cf8$^&`GHvN&Ho(cyG;vp+C0(`QLVv!IC%xRj
zS|n|Ry47Zes(}SZ#@eTS<&FA!B*akLyG$s+23y8Re!x2k`|jJ>mc$cp9I|+NHbXv{
z1au4uFcV-_&8r&NJ_ipI(+aQsA;E`tlU{wqVs;sN>Nx$K1VEUGBnuF0Ie;jtT^Fq1
z0{w04oGWS^c!J=e$nbED;h~`76+;-zE7v~{4njYej9MG+?>i}s^nk{zuU|9^n6;I%
zyktbRwH%O$P1FrZ|Ep;sks}dkxnmN0&g&qHE<Ltu9$w4AMpjA`gJ#u=fSrCutVb-u
zN41m0BxqH$-_eQsz9%yG(|#Gm)`{P<1xQDC5JV+y5(4^rg%NhQPXftFrEX$EvPjUv
z7y=_<r@hkqB>t`k^+xW?GJym3izIXoa&kl%7pAUN`%U@@`G_>?WzgrCTOug{LBxA;
z!(aeo#iHk3@-o82gXRW*J)?$p%(F5y98(Vu<cwp?j{AWpQ_excl5*itVZ6Y^Gv_1!
ze3lDJ?B1<cAToyn`vtXd?OB9FX$!Jrgbfy_tvT{ROHr?|Ir2gE2K~EAdZUhJJ(=1g
zLHbffNGy$6BA8iB#vbv%X*CZBBua#9RXscdw_ZT#M1UBruS1NcwWoR6NYNu{Ytr0L
zcQakiVRSvZ<3^X@mL}fdtR~OWYn{WT+Av1yk=eFjlVs)5_${<Fk3rKrz+g5_5`fba
zDP*y8PRkjXy++g$CJcb_c5tX#cAPhGbn0J-I7r<eBK1XQ7cBLyOgYmGSFjT<&A_yf
z8adc-<ojW}=KCz3Ei9g!yK-U0pIcb)7MGX3rqY&DF^4evT({_R(_&Lq4b?ExpjZF9
z(uh$WFBdA##F0v+GFmBD_D`Jf;9%*ui9miEY!ewBU=xYP*ev3(HDp9vL$q0BA1okP
zzAh%fsA@(kNCD2BG4|(xAnc?Y_t;86dyzW9)&T=BX=2ELgp=94vRpElzoqV?EF;90
zbqX9yAOW~l0QC}0S?kfGMvorTk+|B?$+Y8b$Go^trBvW6A561OXUE7#gy+Fpqu$w!
z!uO|6zhhIwSEhwhbo#XAdPbk7E<TXXtbL-Lf%-D6PdD0iFisW?)9x5pdX;6XFyP-Z
zz++q)|Mi_z^I^;Gni2BYhI%vhf?;7PGsNc_L31q(UbyL4L+TR8LT8k#&l^fJcg{b*
zw79|^+T~00uldU>-s0Rv@0QZ)Yeo;l^Eabln`VirD*fO4UiIzM_}3g0v$G$%{)u<c
z`Z4_7xs0}!QBNg6*-4BIOFvZB?iY=9qon3MNlbZ+nQ{kAndlU8ca5rPD6%yuK=-jh
zI2SOXP+B{^5WSX?D{wWD4XYY)tJarT1@cwXuvvUxye45M&tc_SE=N|m?HUUM2ok8n
zLKyGrc6BdVOR!d-G(1=+&`)NTuW8nwL8=*SM~Gb4ftbXjo6I}Ck1OpIj*PRoM}=WP
zX?BL=-|-N>{sjvK#Uh7~+{XZ8K%BqQ*`(;~x$Fw~2EMSWcF&}MkgBvPeYS}a`u^3{
z!&Vx5Lpdb7!FUnB2_jGHeEm&c>Z=4UG#>&h8KoD7f{f$1XW_MzHYI1XOU%{0KW%t6
zuZnjK2VZF!B}GRuDDcceI3Rhcebcv-&w|U4Bw>u@`>iBgD5@gO^I?*Or<}5Lu+Z;-
zhgfx2OWTV%OPgbZMrj7OSQNP9WL)(6Dai$(Sr98rg;!+TT3L`!3U2(zwlnlmbNcfE
z?F^}LA=}sou`^_h3)!an><nGl;fx*9v|$aARtHVzr5Vn4nM5V)r$4#GUL~RQPxr$i
zQwO|H(~#V+!~?%@>FSdI^4!Ye)w!Oi63%~VA#z0^*81j$=gnjZ_s1<<mMhLw0g!o|
zZ5fk=ek$+{WkP>KCrzpXu<d?PvxKdZHdLm7D5y=axitn@Bw%wTtwlFg;-*r#Eg+~F
z4~`(FGyY+IdtAVHpU!d-TtEud)z;$#&=U;MzYKsztCNt5x$ph(`(?OF#pJO3;J3|M
zVuH~6tR-%08p(n9pBy8}x6OdTX$|xF?>Ngz5*EMe-=aVK++2PH@*H#X2hzJdEMe63
zkmHM@=1#K9X%g%>&`iD~8ZQz*q(P{^UH3gawKOwj7`F#3lps;&Ie!L+AHiY8QUXM&
zXajs)-x0#_v}H?35D<oM+0u{$NLiQtn!`MW{_KO|e4PGH7h$HKa>fYdCyjiYMhbj4
z4&P%1nN(fwT5igpV%6m#YRb@rKW~)%a)CAFqm{{tiOM1GvdU=5Hn07p%4^9`{}cxS
z6-*FNQ3nBen&lyJ)Qfi5xR*%4DW~_$)MumFQ<?;v%)6ukSC@1WgnN#FqSd~6$^bK)
zV&paLgBRa3nA<r-yUJA)|Cu2h92-lwOc<-{8qJEHvSl2EVhHZ^JD#yN{kpW)lM6;q
zE~e`7QIRx`>YskeQ1THDi5E3nu8e9KEfbE(5!`7e4<~wZ_KqHR?7gK-$m4#MMo4c)
zK}a{**ZG=CeR3ozBt$baiV#Aby&uQHwlq9N=vT8YI{ACTIV1uw7`8P|{<4zv;sPIw
z>eg2=%L~_jlGEoE<~3nE!$ka7%z43_-|D2?apBr4I%i>PM5i#s=lHa&7e}W=lsQR%
z6p5J37K@pa%h%5-`tPc~D3bT}!>acj4@e@ZQJcriT-Q5>&)BfzOKI*1Og=*JdBVM~
zC>?uT3tGNjCZq4*H~Zwo%Wds{LA=qCFju0yKA)j?B(w8LQJyj2A}cQ?HS`NZ4BCQc
z7QOz4WAe91_AawO7I|C9DTmZERya`568X3WftRuT2xJmGA*k7|sQ5V(@yyw=2$e12
zqITP|B=flfz=dF?#n~qobVAmZHW)h>t{r0l8Zm3n`6-_-sOwjBa!bNUN21eFf{~*L
zn7&=s5)rb=%Q`uiB%r-feSME5ARPCz?|4kzItI{KxOPC@uBg3=PV059e}f=oOk2d3
zL?PxWKVa5e=si+=yuXYCXaAkINDDSGKX>6mPImQv0{4#@WEC6gy%vFi&tjSlqa>(p
zirCDk{uglgDI9(hhrfs?Cp09PoR5?%dSK<=Um6xY(4ljX&V2trrC{=?bF4t1LjPt;
z`<>AOax4xJr`%r<w`32i4DDQfO%0V9g5muG<AN@ih#6KYhrK_i2C+e?yYG<=>gc($
zF%yp(Gw~RQ&PhDon6V&NQVTLm(Vta4%rle7CnA^_JQ0<R7&i7Z7~V*HQ+yJJ*>NsN
zXSUys`U7epQ!ALPdS+X7m4;Gj{}3wy>FJncCkVeo&RN!0>=*>6xnjx<yxhmBV?`#Z
z1?8mX<A199&Br#JUuTSjh#aGDvZ=y)UfQj+bP_!@>%L54Y(_OT0eK`(-_`ai?whk&
zo!hu{OaN1ApO?_2ydci4Tm1#!Wd5<u6|NMWGsKgTGrJ-_Q{+?_E!p;3tMUJal*-_J
ziC#Ta$XH$3wPO=v#ENR;rWkId2vX*p@ma%~$l<9`?)z}~Q#i138LbClnC6dU<d0N-
z!542wyBxGTJP`z<n`q#b3dadmiVr&Dh@MF;Lq&rKM^#^>5W{6pn9S$wL}AK?Z8!L7
zdi7|!IQ@~>pzeZ9Ik5Q<#=<fsGk3u)LcV)5BQtr}mXM^LCi;cSYQCP!_-w9FjOE=3
zz&HyEu-sw@NR#M<Uyp#pN^^AUxt2;#j%S0kyuXISUo~Ls+w@tc>zPz9>q8YrsWX4&
z(T9dMZBj6d27zK@zJGtgSb_n6Bny?D(c%=ThBf00u!8WH)Q~gT-{eeo#X{=cvetzZ
z6A{Kk4D2kiWb#KvZAG4^_mZ8^d~S6nr<2uI7HwEAfo9vQapc8};ULb>(W@V`Q(d~3
zrHT6p*(aP#rkzE09YYL>lc4Yb=6pHfDG6ECh)?C@&1D$gO1kFDaz;n%#}WD16Lu<C
zN@tVf^)3qFl^Hyu>%O1yKj0mXEf~&Z`~tmtwxII^6bV%&RM{!?Q<HI(iJ{686jLy7
zbG{u$A5MA>TKD~z-ig?~^YD`PGOJM^MpBHAa$UZ^IAC<v9Khu=48c2^g@{z+BT41?
z50662M8+f}`u;0+sD*)cCKjH-UcA4K!_VUYq+ww3{w5B83x~h0hALV^i2g%}*03(J
zo#`l2RVw={Q<X7JTB+y>n}_{pC}t;;U)7vjwT)KYxKEDe8(A^BgCvuhwor9+R)~?}
ztU+lu*Q_-svuTO2yY053NSm>depY9Z#}@lZWzH*Nus)S*PTQw4Z)QsR)Daaz{l(o{
z(vCR^V?32PcXQb<Vpz=wB~`a>G^?(OD-fHTUZ+zoSL8FzRn79z8(fKTwh^>8W(C=F
zE~J%eEjB!&2I>)xv4i=Ae%lsjW@E|x_3JGuHo&poc}rHtn}}a{bhwtPD?ptxLmfNn
z`%29G5A}f<b5c&3F>DyWj#Glx<3wV>cI2EGHT?H<{v~Kl!j-_1r6(JwzN{Z6t%-zc
z*tUKic0X-wEXjEE)8O}0cd1g-x(}hgq>^qOOm7^uh0=JD%IP9;(xlig@axeo9Be2e
z5w;`o-Eb{AU5ygD&kRS1L7es$&*^fUc@Lbc?{N_qU5}-=Sfhu$>RvGe))J=A%U9;-
z7nhe=*t)p5^2((Jf91`~iz4xHhA#Au<(0XW^Yi{?>g)U)EF5x#eg`TWpUz*HTV9qo
z*;dUWi>*r0BQOrzX#f(uS}?|tl2mwkm~Cd`?8Aoea-g6iNAT4U6J9peN~IR#h7x1x
zAzimrfDcx#Ndtu@q=c)z5W1O4FUBsK8pB5P^-$Puh2_ICEMW$3`NP?&WX)9x)@c6H
z((;OTWq#$7mtV!dGhh|(HH*Y+oSHVpNCnfB_to5d4^d8p<!Gm1Ez>i_{g`$Vk0zYE
z+nktL4%y}ec)M6IHmA|DQyHmDGdzPMT$bXg{R!ER^s>XEj>7{ZEc7tKkED!HH<)9E
zD5WBdKwSK~^lXMH;<|O0fJsH2-h9cs@ig1vpJ=XT*qKQ(w(RWL!R)NaD#MtHA6GWh
z1d6Hoov{{-7yuW@XQBk4Goi9vsCGf5P1K7e)gh%fe^(jH2!K-G9tzw6px;h)12DV^
zQ`#mMIr_^o1F*jnZ#n@67#1=C0uo_R6uxttqe;OD_rM)Xh7<8mQy7_T@LQv4ZDe?I
zg!O$dv%Z2U7A0J)NIb6uGa{h{_}y386kh^uf6yBAmlF<W*xH}QIpn!m*7nTla4Tu-
z8OO~Sz@KFXC&%&kz~-hbv*1<6d~sQA%h|3hy|#oXls?w>tNlcg1s~1T{L6Fmug$%@
z==~$y{f6ac`YtmqrXT#0@UU~ArG8fCycOYGd5xQa49+3QP9Fjn`=NrV=juS+J}A75
zeoxccJ%y!#lr?pC|NL+%D`suah;*Q=6Qeu<C>dRkZd#gJG&J?2188bd8><JA)n8FU
zR;9fvqOaOFO%gyZkg<1H0MI8(kg_@6DHY}57)@i1xj8Kv)uW}%tZS!tD>SD6lWry+
z#lU!m#&+)J-G1|(C|(5=PgA*~YJCi7sIOmv2)|d-8I(u03%sP=+pS!!hEW6a;yDCe
z<05(t3)6-|5&|uHi%av1BAG#76{{Ma7eh-1z?n*pX)4LBaV@-YeMLV%G}`&KRCqA7
zbHX`1(jU=<KouL&X^5j^g8`U0>Z<V)vqf_*cD&eQY0P>}Z2hN#vkb=!N8*DZcDW{B
z0kiX<wv&J$PH<hm$z1(oUIZ>wF7(a&CxGO4Cvy99REmk5_iJXggoeRilaea$pW(pL
z^sie2$uudAzWK4?lb~P7EKA{0T9V`63DWb=>D8aIq^DdiOieP4)@XiN%PIX<j(6BJ
zqEFMF9*C$JEMvRk&*1P{e%{k-LqmtZUoaKT2pOcSd?}P6&hEdWSO1<dbCn${EbG~|
zvpQ;K^{c9fuumbYCmP`+R|&`VJ7q)RM|CxDNYjH#o@l3SrrwFvO~Lb%*ntQa_j51&
zU${@bT~w<Fu!T)Hf&GW*KkWQd!hS<<b1lu)Ls)3uD&<!SR$5Q%eoK6IGpM)h7Bq;`
z-a!R;)WEtKp-gwC4p)Zy2N^%-<8EKmre+BEO*RrYUw7OT7D+doaMRd7+c;tqtE5!K
zyFL%Cs*4Y@K>ptP9ZwyorvF)P@v=17GnBgxf)xVWdA{qYC4255c9g$VIJVYoPHFC{
z2l8$_b`WRfS1)QGJXe}A+D{m*bEp|M@yLxyEQegu47JPR+;rroRKnq+TLG%#kd|dN
zny|e>UCo@VGQ6NZ<Bgyp#c8A!`(&oM%v-Z*?Kd}xgD^VXUSGGs#qmR}I>ZldjmzD%
z6J5H6?yh$`wnRzI6s5JcGq3|GOq+1xf=x8D(!?IC5#W2DfG6aRsqDWuqT*NBh~^+N
zu2s>SMEIkr-O4W*Qpij(8Q1Bol9HB8XbpzsXTtFWn|u{;Yl^G;|0)b&I^zGJ4h4d4
zA_F_bR}QJI!%{Rr?+qmqV6i_$p*MWsuAu|@d>2Iao?lM`2&&jgq<Sh-$5}VPse%Ch
zz~-2lguqbJ*wRL9$#-5=tw7%;xsAbFlG`}PpZbnSTs09Dwk{H~IJz5~T;M(J$yP&e
zw;n@2H<#RPwA<U=9UXJR1?+ciE`rwIk?F|MoY~BFQ;N@ea8F`xY|@=>&$dLPZvtUC
z^c#I<3i!L-ol^v++|#bRS?3UA8y&OWTMZSs9c-u1@K7?J+2Atv>+AIz)}ntVcbNVn
zZSU5uFR6)~mY7QL98hh`8|6d0`<i1jrEebgO`1n<0-|Y5?q~OLkQVN`<~k-Y-=eHt
zepdB^eJLJh12pUwM%`cUN<N~@<4(GHfq20>vo_OG3KwTbs-aGA0u;AtmPknA-B3X7
z(PX~HV<!Os=(UY<#V*ovkERXTk3c@7ldGiU{R@aFUuTI4&M6izSXz^hy^OWIj1tEj
z5yO>l`j%Nl$%c9J{L<wsE2Qf$y}szJEP4{>$I>(WVBYIWYcd%+%rYa_-pCq;*i7(`
zm3Da$Ph-VsUN5-pS0HhMc(WH$IDlLvU#lXq@R!IATRk7aby)_2oP-EwAZB=i<8MNc
zgASmM9U+mOw5S4U>IM2m|HL3$MmV68OIl@?f=^D9CUdE)Ec*UiE7_+-N`7o;QYacO
z7p-5I_m<Gr+i0Dv9c|Q%_6=p7M~R7nc{iVtb>qkeV%p1G(mN-G5;UiZnj1YxeadEi
z;NZMV3<-j~!xys%-hAU`OYgBo$T*ii5VbnpI7-oxg%%F6ll{P&0Wv&VylD!*U{ytM
zenPek8_MkA#WVs1t)*fD-pr7qm!lSTl!2|7S&f!nWxqMIvk8}<GehUDXPA%z!|79P
zV@zk(B6_^uj!*UzJSi4^iBk_rV98dDHru8Vc`{$siu#j~x~)19xKplVEhIPFE?}nK
zqHo9BVy?+o%R?_|12ZhTx8#dcZpg0$X=C;9<bVtzUaZ$;yEZKW&u|~I30u&~We(z#
z%yTC*#EwY-Q@no(B;agLP(Aj=f#S@2i*qZBK5welXRze3<Uw8>5U;z3_naFR(lH%^
z0n==(Rw<ggY6lK-sL!E--Wvo|4_Lu|vhTCP=N)Uweu&dPGnQ;^n@LQ19q<!r2oFdU
z`7~Rqi`vRXH=m`O*FVDv0EDg2YRmShksigHRx#eRQ3>(n$ks18Swv;PwVME8vGraD
zH3<iR`V!WggrP|0to8wR`mAiqm(8H*2~bGAKH71T30OtH8XtOs6w;l6%pd@b4Ooci
zh4gCv^{keX;qq^tIrY@FD#?k!ZQ?y)mzld;<+t8CbL}m<JrMk}-!;e6^<271WLjUk
zGix7sA<am)Q_xd)rTZ~~C>eA|^7aHF6Bh+>6DE1~BgN^-(fUstabV8#dUH$fPcI8T
z4MwFwzU_1i1Y1ZiU12^=XI$zGRkGKHyd8q1lF-yVTr@GX6U3DZqs{?m(z(YO2WRff
zuEx~t9?U1QXN%80YSN;0E{@{8n{`h5chO;avr818Q-G5UNcYq-)zt?EmBh-6NI6K+
zcV_dCyim<nLo3-e;iNmRd5G2VZDF(?32E2?z(}e8RMEKSN<}W~R2neVC*SK-rxWb=
zP4I^>gKPr)Z5*zd865T;{Wr`((Nql7b7W`8IxwG8ndm_$F+ts1*{GNiqwSYd_hcR_
zFZ4rYuiiC7T9nqe#4;2|5@({Nv34uU&Jgone1rE*9BiJRz};`!`RfjU>NqM#kFFPz
zFW4C^j+5YhBo(}EJm#ZH<pB=cF}fyLUK>g5%>9na6$9(9g2a;OfLc9qq|1fX*Ruec
z?jERjkgfOTjf&;1nr=KYKpaHv*$<#uXA?(Vu2aD9&<FXvhE=n|nGjzHTZ8%@ULA~;
zq6uRgoh03<fB7(uJ;xwKx==qyPh1SPbuEzGR*_+p*{+7mv}O0F!5>H-8IvQ60=e{l
zos-W6kwoY*p|P4XXA7zIl}IwyUacZXQctFUz$v6UZPaG<3o|Fj(=}pIL#zdBP-1St
zLL>ueM$LB26>%H&W}W+qb%o=08>Py3jfumymoG0q&;*?Uv)m>(EBe`<4~L$So#2`E
zIEqfXSvyNlFDbAwUHIu7hz(KP1{6sOp>H+aeRfAy%Y#s+KT(;e`4zi$wy7X^lq78y
ztqY>}mb8p4K_MfVpb!?kZ<kf;nrV}hb>pvbn`6>u<%2Xh0ck2MnZ3z4wNoAodBX}s
zxp8W5{7v4uOzK;-O1v#=x`*&D(ljj8J39@?i-EftHFjKVPwxh6T(ep$WJtYh_2Y@%
z!;7d*1{UaPE3b3uHwnDTHm}KI#^%cE^FZs~PJmreT4!BUbvmujtQB&--bkWYZ<fT~
zjk`$ek8MQB<LBD#$GBRI>_ZK!?8RNWcnZHV|9>fvz%ACF*Ems+t`0D2Zekm3QOD1(
z^PV)P#0Jc2GZ*{Mspr;;o!3@6n#b~><bvTGp)3#Lw5bO+EDAx#?p7H?Gg2{R%hYUt
zcWY~aLd3T+pMlrY6~nez=4~!6ka6DK$V@XoOdnov1hK8Lg=W&iBX&2s-Y?P}rH#pi
zVC!72FvQW?nJOUtnJPNa$;4lGu`THh+5Nf|)!A+nWUaWgzV-^ktLz*n87?p^zOs1X
za{q?xd{yrN)w?qsry>k6F7VDz(i}%O^}fgY6Gs-Lv7}(XB3s!|arzpL{Ly$I5x^D-
z^$yy+PN~s?wI8g3j~N~gIW6fX8z80_u{t<4a?%+kA!};nqxgSeqHqj3^My21oLy-g
z9BRnz8B1>W3OaKZL~aB`PUtQSh1l%C$!sglEdCFQ?|oZoac(lFgLSIMw=`gfimZ-c
zzB+UJg=*gKmGb*Rj+9~SV$JOTeeGnO<J`m8%^H7(w4*it3};tsCIsou)|rp~<Lz#p
zo4uj!aLubYv|X-q<a;PPUE|OD+U=T$pv_vr2STJ=-?^ZTY}iD8yKMa7%xqCmUc}*F
zEA6$Cq9wRdYslt(-dP+zjRO>D51EqQXOs>i)uS4Y(t@G<S*x^6A0u+OG{Zb#P`nnh
zDcKfd{A8EkTzTcvl7HE|^aeH>^j0##`u`D!|Ei1?g0=16v8oyJd8Fpj?<!M#1&bjn
z1&?cpVN00oe`vg2<%1}9QFNwGSBewID@QBil}cr7VqazQ(Bnl?yV)ktAEKJuhfPWp
z>j0(#SDGl8N)vrMH+j0$C-y;u9@1mp;eCbj#fNPbE{SkqJvg(kjRq;bO--k#NZA!r
ziLIx&b{us(0~9A4p0D>5idlHdnP4*pf}JSN?60KjaQDQs+7#b6mGQgnany-6QS9A)
zpCBDcQs$%@uEvy%nVo|qwnPdYTgBNKQ0N$&;u;3lZsaBxVkXIe9A=<thVi3=9`<b9
z@Gc7G`>9reWz19$8BqPMm+cM&6j=Tv*`2kW)kWclk@9O@S|PD&F)FgtiHQoAhj`5*
zTM8T=s0s^6*kezS9FNpH*5sP;FTK{^>hL_$zf?5o<Ku^I$*M|eVt{FJS72+))ND*l
zxrjYm0E{bAC2V3$OyS-;MX4QX>V0jE={ze_?RVS+`|g7fhsGxObOPbLY_zc<NNmha
zOZVniD_>(VWuj?)#H{gGgG*+Iu(;crIXTOze{2ho@+(^2=Q4bEaClqk)CA!-zTO0r
z#q21-8F64fUA}a|N9E?l6?Ww<U-G@hE6a;07tf2oR&Rxw8X~qPJ`F?$z^MLVm_)hF
z->~k);tAqHqt1QBBeLu{SN<Yh*&TJn1p4Lu#;EKU6zc4L_Kl>do%E(iET`CovH^*{
zS<?IS>f2n7^Nua{dFyN1n%WsL4RCo*6wQzzaR<NwyG7aT?ml6vQkfDOx;B+5p7G_^
zOzGgxH~G?(+B#q=nIUlhpxAC*j^gKlw}g)O*!y&HMEnc<mDdAx^=j)`RZ6DW)9|9B
zch@PX_(}7bVnew;$LB}Qx%Edt?C3u0SP)K03m59+{b%!g1zLuaAzrTATT3)+Y-EEU
z@9f%x9I{Q*$?B{-mu;$-;AP`fwcQVc)0Y08K&f8I@Po;B*-o>q*$6&Mbi15&Y0T7m
z$UK?!#dYTRu=OuXq+mL|G?6IjnXS|P5tT2<c{8Z<+^@5`pV@?NhL!X`G{TtoM>u@k
z(C>O{z3qL=Pz>o&>UOReqP`(Sft7DOB<t*<1Qk0}&G%%s+Da+@AK<|IQ*|vBN()f4
zTxZLDW>^Nw)_dP&I8?p0(@loBXGV!Y_?sXevuJ6o$mKp!`EJ5FUVsw?Sx}Jc#)+34
zApQQZbAs;E?}DudKW@m&Z>dZT;I!cPmwzU+t<~=7G<P={)pfU=EHQC}g^)ZW?!bMF
zq*db%ymel4l!a?A@IG3PGEEunX;3az^u{mSww<5hjbFCa8}#q0&>fH)CJqH64wk-T
zMlzMz8{+HCw%R-I59N8kmPXEY=Jtu0y2|xk$pfNvu$xR{H_r-9kWyBh#2?O%=|eFR
zN_dzHB$+JD95g84l^uf{O^X}fvM~DjJfNPi(E0~fFp|xpqagN1&=m5kKa@?jFrIG#
zWPwYnJjtLb8|dM0sc?YL4syvxT%CbMtXC!bKRA4kQrU{NKn8Aph{_X$$-iN^GfMLn
zv7^gG<q>b(8C0I{DfRx#^BENh<Zv$K7MWd9P5f>r-Jg`9wttuNcTkV=Zk&FF<<n}g
z!r~aIvS1XJqET2(mR)a(-T%V@6C|s{M0B5H;P8Pwg-q`T_U=&D4t+mZvZ=<0vm0UL
zN{3-bfE?l5{Udej_h8VB$295NG6{xKPQ%3>7lUuu;^#$9B{o&|bp3Nl-|qYXi(rLR
z{vu$Iis+YHY+)s-73aUErSZKq2T$pMuL&)45%~*8jQoZ|u?}K^@$&!7jgN`+0krfy
zJ%uqIgb*iVOkXF_>H(s*hnU(v?&zxPPF8~2J!3vGrT!BvwoO>EO)eeDgoQ|pZ6~|^
zBQSf34gZ$T&1cDP5bkE9*Uk3FQZF2GgJ(*W8-E3|39Vl1*6G5T+ahMzYU5p)c&@az
zTiFc|Ae6w!krhO&7d{&!5OlHvirp}drJZ@bafRMm!R|sjou|@FpjRx^qgxSF(u%A<
zs&okxF2O1$rkrPk^HDjeoa3l_YLh1q)8XYg@1?ny7wNqVsDd(}m6Y)FCMm)Z@-j`P
zCIH8LL2RPQ_3I%=gp1S1*OM?jndYNF+b0=qCy5<YxSaU}{eILbad60ed^TBlfc_mU
znq4>aK8>bU2_yHP7qWKSYA24`F01$_)b3kco5XClArYFcR$GjN7qOHhrYNJMj!U)B
zy%F`p3T}^(b7uEU8B)51(9NsYe}VvhlmY%nc;kXGV7gYs=;j+zvq5LX3OIu*?RK#u
zD{O}Xq`_-owk^>>8QqF%T@yxW%s8AY&=ab#3{uZwgB<(8I|Y)~yK==_@-HqfFC)SK
zH4BMc!k&Q&J~}qabj0E+g#^V*X1B9pK+aT7Ip`Q@KICX95vhlNN^?W3qOzYlSt!}0
z|1xjEi)8=%=>JC%AXu=0@Rq;9-Yj*##fc>`L}uBG3X&<8tJyB9cmw1;WH(n@)aW)t
zg)Q#w3~mQo67Cs+GrPDFU71cd?k_Gah!2r<j7RgFktg)1qZJ&K&Sqrg=^V~P-h&=W
zHg&nolQ>Nic&{U?JE$BD%UM_getn3-79@rfhGCbBWv4P)d7!f2`!{NksD)asL#%{s
zcFuDp4Ynr<r;4B)@E*8XZt<?6CcV<yPVW*eGsmy&N`9k8P6^fECOGb(nXL_Hx4KVt
z3Dd@=5Cs#@uiJq_8%kbhRnj5sRtJ2`mV8M2s_H-w9+U~G<|u`p;DnAlBfk8TI!wVd
zX4~Qz13ltSwva?LU>0*<+v%=#qQrzhg!NjIwsh7G8g}fr>fyEODYqI%jTv)(5*KhX
zOFH!o&rf=ZHJKSYPrsdm8E_L1HKG<bbh7%cBkpL;>@<`&Ej;%XV|-Y+jJDQ0rebwL
z=g#=WO!YabYRZh2aSU1;V>o6&*Gjp?wLFJxY6SLfm29!O2~%v&@J#-ad1Znuzx)ha
z7;OQsJB~>jDs6Fnl4lFP<*G~?pVLaXRky)>=R)3zM*vu339nI~7a$w2W*-m2F#nc)
z$wuf)f=sq5H(^D)R+q9(COyc6M49*NnL0_l1dt2!PCYK%CKUTV(7O{(ww{3zhOjBz
z*N3%8vfq6-X<NmmJkGlq98M!DZA8X{=~^97PAa7=U!|q~Y<7$Fg854MhE%ct*`W>G
z79Oq>r}F(TmTYB)@c~1BARYQtX6Vm<=SEJnMMnPJxsmrpzCV<X{E!*B)|LAPU(5~O
zn{bBjKR}yz-g&NjMw}6YzaxM$!P9Lv7WMt_A2{E48NH38N7m&#%=e!Cgb$@WFYyoG
z|Do)_B|~SBy!0TJ-o@b|0|ef~I6N|3@d;wm=Std%Sf1pPeiN0F{OzwnZT10Bx@JMH
zpgZz~W$}l+H4lo6|0o8DhPuf79hgj&nU+N}n^IogU+<F68Tr14yPowCQWv-CiT3Z>
zc6di?Ub9AfV*{Jsz-%C5^lsR$aoC>h;>T{o?wSsx@t8j|b3m$=>~_O)02*OQo|!kN
z`0X>FQENAMf<$)E<~R16J9htpbXx+xZZ=F~tl%p;Q{0;{BY!1mg$;U<7mPCzNlx=O
z<th)1i0%dYJUZiz6T;BE@^Le|m#&;UhbpJ%R~OlA%I(-l76h-rqRN@rm*0HmRe$-l
z^Oud~;QXbfWoAnxtZ{ez#EN%i(Sz9IK~EQt%!@d@g2N>outKt7!5RY*U><TjhyL&u
zU{WxZlhe6m6PIy9QYUA_OPmn<^VygP0Ot^#INE^qKK9$)gm|UN4yO<P4Kry`>qb2$
z5;MX}e^XD5?@N-f?+*{}WItg^9)1DS+SvsDInyYg%H_EZa49<eSD2==apyqsc&P$q
z3;WOLzsHJ|LzRQ{{7hxKaxYh?J~(}Fx-(sy{`B-`4?l7Eq3MUF%hUgBdSiNS`k69s
zByrsN|4>T<1QY-O00;o+t(!q`GgFRU4gdf#EdT%z0001Rc{FM-aB^>SZ)0z4E^v9Z
z8f$OcIP$xG1uy$S*2ZfjZLquWZ5D7GB~90{eYVpFnn0#y+E$~7LCHzIMgRNFkQ60K
za@#HLf})AYd2wbqFUhuTYqnuz%abhelaNHoau+hPWUC-%DapiM5CvKA1dlXciY1o%
zbQ{ELvP;>LEcVuTfA(4v=~zHBND2I-Bx9elUltVMG?Q#YGu&mXRp19K&d4g^WX)p6
zY52=_$G1t!tcYcsWJ!D)hQO*#7Q2i*F}^1tnMQmLvJD9-Uo#T#q6OnAX)TyfK`;v>
z-4n`LNd^aGy9i<mRJ1fL)>(Jj@_DgaIpmq9#82WhSTYV`2eDKGO*0{c6~r0iF%8Lr
z#r`Iu{1H`=ZjxuVRA<Wsk&vb3Lk6)0h*3(C6-k4?nN(*Gu@1@gwcLTUiQu=QekQ3=
z5+<32ND=6Qq!|Z(sW=(?<Zo!a2EHw&u9!xwP5h7oYx(yR<pEuU%)-aCO<3l4P!E01
zXa<-`Oh&-!^sG%@oSdFmR+sL86z`G?%0ZHlMSF5d!8|lbEgL*$#gmA5-fEZaIP*Lb
zMB9XCWETg1vSeNuWN1iWMHguj?lR^{2uot3C4N22vC<6GK>5;J{CWoZ1OjbiZOL#=
zp~6D|O2~09<)hl%?$17U{3P0<1tdV22+skAQfuv8NLbuwjB-x*FWUDOQ9rl{kZrI<
z<xc^M58FK`Cw{57P2?;A?`W$(#XkH8eg)5#Wxejb>3O%kcizo#M$XBL)3e{set-Gv
z*>AAtPW!#F>-AstCa`sMe}80IKM^<1&{vVF<ql_)_uhB}$lmDUwcDS0H<tiwXSUTJ
z4TrFBGa7p1$!P3OX75cH-?pZ+-t49i`|j1vJ6!v_Ya@@>Z*Hwi_o{a{n0fHGLf{{5
zV|CcOb#Wa+1Kow0z?|*3EVVA)&)lgudp{QYfCd7(*X~3w1-3O94X^d{z_u<%qk-ES
z>ZJuR8{J(DT)nbH_WhgN-as#1;u$74B}%Dn!D-xyJM0&j+_rj?N$<U00AZ)@Oh0Zw
zoSSL!9NN}xZ>%4wZ4K`R1=rZN#=ZVuz3YO*78E|{O^ZW=c5mI;tI?%iiGa$;yLE3b
zT(qa1*w&lTO|kz3YMQx|-fUFpX$_p-0~KMzcZK{bq3p2=Sl|c3K+Lw>$z&v`e}WU<
z_51F0icfoh2ZSkH;>_P=X};&Fa{x>ZeN4({(}3c123T=kb@Z$M3_b9uiEw0;$?K19
zZ+q~2EEiGCV`AUOB~9co25G&ELidv>g3F&;{T|pLCz(5+kJPSi+`(mSYlT~I4ZuV0
z`l!_^K!5<#S9i0^(c7VS-J7{@!AGi4DMAVNsP2Tg1Ku`J_pGvi#P8!H@PgaXq(=29
z!o64SU|gfRL8{VSt4I`);Of1(ao^T9KjNnQuJ4W;h1udZsI!M*i=?}SpX*ifx`z8O
zy}PK1vo6G$9so&8AcGsYcTGTH0W@%jjUcRa@`(%PXoM0Lhl5PmDr9~ZK(;EM2JImZ
zb2t2Jh<0tFvCAzdAeARJ0Y-uua2o_^X7CXnF&#~WN)G$X#C_vVrfw4u*FaipjY?C#
zH|+Nsc~K2>i;=kjBGM2yhd_c7qq|`v$V%z`Dq!K#P=&mH%7GgFM?y@SAyP&7mPH9S
zT~l42OLx$OxI{z+(wohC{Z|L|1LR+^aBC13w@;<S2D}9E<P8<!tpOx=MYxg%Wz3Gs
zgT+-n=*A+s-j0<QH3uk!nYz7+JYDnUn}8DDxL+g1m8g9v7#GxkB<U7PB`8GA1XGj(
zTa`?}gJ*Y>AzYH_RAvg9V^koQQCT9_j%Zugy~zdUlKyCbQStyFq&aJJHyhsxKZgr5
z6Djhu&bnRF3Td=hQu3)y_FXbeV&=4Uoh5(D6-0sXM8h2uNaC+f2t_$>IpiG60KQ{(
zjgGENnuuWi*b<LU4Y6FEG~ue^^bmTvNNPfditi9=QE)XO%7WR`Me9NNi+Zbx%qmRi
zK?zD_SQG9p79Aq<V)hK}K+F!&`C>t_SapOC4-kW)%p_GJ2@lrLyoF9zkU2U!ny_t1
zedtU9h9C@qeuzWogpJ?=x<7*6pF%gZfgUpqMT;iZW9Wc{F7tgRJ1Xc~62(|T#tHO5
zyr84-x?A8AkzAy%=-FhGfgPNrpg?&Bb#fO-X~GgPKZW?s8FW)I_INz=qz#J$cep%g
z%f7$2+i(}*$1bmOxyoVj-q3LY5=)TSc9eSSR#M_M=r<SKRBuk9ysq?f1sy&6M5Ap8
zF0o8@(0~>?RJvBODT}0_kus2}XvL^o%pVFh8d}EvTyIT!*0YOqas|d_Vo@VNn6SkP
zP9%Kn{d2}ztHFwZPiQ(_L*20HW3VZ8bKQ#18zRggCK>w-M5<$G`Hs(tZykos*+bLv
zbs{2~n|w7c|0XW4;(ou(mGeszlrKPzrkvEc52^A@5H$dI!7%~2cB;F2r;sAQPL@8L
ziJ|)U5{77)c2MlZ><KPBXVl+-Hw7v2Y0A(Q#-jBnFRM%xcO+}?vehrY!xgLRr3##1
zLpk&)#p{LmRX(Z`*4!!4)Z>O$eVURZHHiclUy%k=RG9oD9t9yMKUOUbNzgpFV9XIH
zz*&z-G>lbYEC)5M4|>GXS~%j0F3>LZaZyW?cOG(qg4C0<8*j^#Py3dLD-1$b5$-PW
zF%px{=M58aUqi`prgDKXQfQN)!#Zu8_H8I`zNSx98ZDm?$~l}EQbP2f2N(_`7H8?4
zL=4B3W{}g&R2W-)WH<y&AiOgFv17zJj2!44ayEHTNF(-Td|!%VsZ<0QAY^f&jt(B9
zQ;6!2Kgo%7o=-m8X2cc0pc8APRlm{srn1ADWuB^?Jz$y@oZF;qAa9!_;kl^&w1N6b
z40kgt9a8BCG@f7%kqH<Q3uh?^PesEBe{ZpO#MHcz;kG)k#x)~lp<2<5Yf$R^bdHMk
zC376|CS<;|gHwC{o#vC6wLDc7S4l{edXQjd7_h82nXB?3QMdqva8z1hoC=aIvl~NW
z$ruaJzjKg`F~K`F<8*PgzQsco$%ok(Gr%8a1D14%(x_6?ALpbzjy))VG0gm70kDy9
zAh&%*zJn4Jv(q1DFN*oED7iFx{VpCwRIf(SCQm8m!^ib(*UZ42vj4xhG<v;$S#X$F
zc?x}8Yg3weeLeX!FsYA%kL&TVv)yG?hWLZIQDmVN6xf&}4~BQMa5H4UTJPUBu+@|r
zYKV%|m2v!WiLr-~Uj{`lShgIi9@6s4RP6k;<(TR1*{ho=fq&N{P&ygi&AR0C$rrl=
zXN;glEfY{xj>5!1+qkWZ`|@U~5|ro(U$%V5F>+ZMp_I?$t|z+O7LC(q^rSV-*fZUG
zeu%ICAf@1IN<6wqYHjg)?qGh4i6jGzeHJ`H(h={tFgu_DbK-g8GqPb*K5Nd5$tq8B
zvLiE6oTRA>nl<BJAtO0AP(f)&VpCC)S5Cc%H1elBaV0gPVr7e1J$o6|)U_m$t4OZN
zchw>pM-dj`uq!LGoUXS2wM6WYQ5^1}QUz&Kd}XYvEoB%{YH_w=9HI@&Un5IB6myO+
zz}LnR-8yA)wJS(T(GyUKrK&+#`*{=in=((39cokD(+m{fpC#Vu&*;{3_0ZX{Ficv`
z+<~)*l8uI)vK+{`E`lsg|G5Z)+jk0(S2Qv<GR_mOE{^lz<iY5+u{u>-P>P<K0SoUq
zgdMs6;q<}S6!#H!tJB60i1na|et6M+s9sjbAOkXJ`dD6>F|OYEavDIxh$SYvl`GCx
z1HvD>7ZY)GlKB3P<JShD?~vsmJz$*seEQ|{i!U9Bik(IpDhg_kf*!PE%i!X)r3TE?
zG<WNT%>NsCp+A9q(Y=p9zx?tevge`~ve)JQAItm$3T|qu4|*hq53zSn^sC^?fkd?X
z+KFWr`T>8|J+N`r^o*f=$3VUlEL9b`lLzuI{kpa`dT3!E+Lj~w!$Xgj=n8W%DGO(7
zh>eVU$u5H`#>=w?Z>a@5R9hz#UY<%yOpzufZ~>NoKZAi61KK)F7SM@T!P3ecw_w>N
zJ=q=#V*v%KM^iF1t8kZpGf>3(a!ceGaaOZM;;UUOUu24}doudS<U#VvNHi+6VnNM>
zrllrEevk!Jq=fapxD^JE5W2}6o4L8Ls-`qAQuAErWycK9N_$m@6MHo;EGZG<WO3-K
z!GqYe_qR+e;J<0%3RG_3w;P5Of-In6@HgIm%=vS=%aUA#bM)bP8`RVl1DmeU%yp^C
z_(ObK^buA)WtL6}<j*+}4MVIj)ZD<71V6E&v7}VkF8>M+hsh>qX_G@t<?f$zK@JQ$
zFFKaOjHY)R?pM+yN&tOc2t`|}GCzJqmKHAJ(>T{sOS+UZ7hX3vSKF0%Ggkss{HXG>
zM{HHrI&`~b@v`I;;68GWRz!L|f+{>lFUhokQ63CWamcfPR!2sEHH$++om{jnkTzHh
zS@xXpN6AU^{5Gdp;+3<_ckwHVgzsk-t6`j|z9lF1ouaCCvHw*zzz;kN=-fl3_3<;s
zY{Qf1m?UvgVdJ-tkRW5SfZ*X{{mYhumb^TM)Y3yWgWU3S(YJ#KdvaeKYpw&8c#R_z
z6=8L~X8Pk&5YK-k8PD5Izz#zEfUz!ND-9yp6A?l5Y*WM)l}F1dMKbXRh9J+wNZ@&`
zl!YrO%hPq6nBtfEafInW*2fWbs|nPEgwp^Jz{3CLj)?{k=0b%`zU=wY65qc!f8%pp
z@3hSqfB0ka>3F(^QSbBNSM|fKvaY~_N>wGTm=_(V_w^%oyhF)L_7LY4DjjGgm-$e*
zRaZmxWrUX(_TSxm%Z|IKQVLqch61olpaj$tg_1ej21m#GFHlPZ1QY-O00;m9uA4!)
zq|N(V6aWBfGynh*0001Rc{FM-aB^>SZ)0z4E^v8cwOY+@9LI96S$>Ht(W3Q5eS0kX
zJ(D&?eOQ)dT9Un7t|-dnE<L-{$2{A^+3j9($oW{$%u1pyA;6P?0C{<NFNYxc1CmoN
z0dmMKK#)t2Ag5ej4grE35+K)Ha&xMxXJ%(f%CRk3;`a1(S65ecSO2QIdj|eztSB%4
z+e!<Pe*^gY_~QaZF6IP)iKzyn0ImjJ0n7q;17Hq-Mx_Jb4uUrX<`8(pU=FL&L2yUF
z+Xv=8^*#jdD0oFMi{R}Cb3b?oz&rrn7?@+=y#nSd;2i|>Ab8_oj)V6qn6H9&2+Tv^
z9R~9-ct^lIg1oScqaa4WJO%>UPk<N&^Ee1(eF6lsJ_!O@p8|obPlG_#XFwq9vmlW5
zYao!d4gy)f4gy)91A(mHU~X@MI11*^KpX?}=O8A)d<(>JFegEr0CS4_dK<(^Fy8@j
z3e0H`r@=fA;tZHGAkKn$0mN%yUId|oc?rbpU|t4^e^W4FzRS6{F?R*TIWXS?iG*+9
z`&F))#@zcL-URa+h@XM^0f?W2ISb+~Fh2w_3FdVWQ()cz@iv$@LA(RzM<Axb{20V}
zFiRk2z?=hd0n9SDn&(y(5EsF`#he#FTmthpb68}WJIwPFrd(of%UoOK-fJw$3iJGw
z=?xH<!K^c71H`*vt}^GloZjPB_nGnmb2fSGegX25()BYQ#m{-nz5r(*h%4|7fSiP@
z;yup2hPkVpJBzvZIrlo|u5s=R=04!uY0S-XPRHDboI3|sZ41@`>@^VAsmQcJ+~7<T
z#7)k$Kzzg*0perMNDw8?v_YWybMTD@<_72SuMFRy)jJ^OnPwA2g)>_qZgIv1u>jJ8
zHd^{^DtQL*!1O^Zg7mrcPF5O#_#`b|%1S%bQGK$CE3bQ@08ngfNWBvVanK4}-3vt5
zmAa5^$Cr^FbDiTkvGWjLZC`Maj9PZb@z?ckB!%AGF0O1}xsx%ODI!lN((%vMWBE8f
z-?WJ?>m=T=W2%+yw$pN?AM5QP)YqjiL)$&y>4u#kl0{F(8-dVUw(DY7(|WTT>knnP
zt)paF!$(fMp}TgtE_J`_HDws-lTF#OQLrLPx~<!x%*il?vduZZja)KmCgsff%(QxM
zcH2|>BRkStfgd?ShA6h<t6{LCn1v`hek?=ZcJ-$8TN|DoZqZOg8^I$f65V2E;>swm
zq~jN{p%>{vTaTQtq#91glQVicUZRWA2HaoUZpDh5t-C>7q(&Z&g&xHr_7|~c@^Nd!
z_SdoB$z-^E+mq9J%eAp<m49f5j@@)+kzS)|UB;~$8i#Ty?HF4IzFxzwE?t?{FJHWL
zu~?k7yT~e>)#vOGC2^&<t<T#yJ+>1S9fIviv>S-W$%K5T6$oj$PE3;*#Rsv_ZbpII
zjiu#yogj=A^&w267*)1IVKuK_!Lh@JyNZRR;z^B~j9@Y^TMnu{xt|IKF>wQbJuYH~
zCRB|D6DE?Vs_j_XVQ6n(<|=wc%6Js>PL#gw&)%4n)p4TEHtO!bn{Pt-IKWxYy`^>Z
z45-NY_S<iNuS;To#%auHt4q3nHOr51vrE$8+OoT@s^8?);dF`9CZ~eac}^ovLryn1
zbvd;;^*QZu>Tw!y`jFFgPTQPHP9Jl+?T|p6EwXGa^=PYyqaKlZxatwAho>GJ>S3$L
zrh0VLV_iL@dUVx8s7Fw2(Sv@}dzqX+O)EP8I?sAl%{`S>179lRVaYk&o@4{0B)ZJw
z<+$QPQ#esP<JqksdVK!U#hGj8X_2}Y<of!B&i0kf3mG=y{O-eM*InCd3j5|C5FcWe
zNmWRtB71m(sxfY2WU*C138}W)CE?<*o0#bkTje7eDp~3Eq3w31aVVAaMZ`4@W3yj9
zUre(1m?eFt^$nphl&)0cC?zjXnm_z8(hUAo(){{INE1`wiF|(&f&S>n2t?MVu+PdM
z=_iryH$O%?GNrZ~*gwsH{r1O*^ePbMb0i`yUp_(?l%U1S7VnQU@u*|^4W#1D<i*?6
zlFiGr0_O*4bCTqT0QVEm2tn9a{XI!2JFeZrs0xMGNq8N7d5B)0f|w>oE_%~$ZAIwG
zH!wbOT@Ej}+(+-N<G;DIUaDvS{coURvxL$Q(0_+LK1sXT4)){eLFlYwEac8|yrEnv
zMIVYoLK?=~9XX4vGRBIKYZHG*gZ`Ovdt>S)%(<4&e1(e#Xc#eCiY;a5?3zP02x>|R
zm{Xx%9Ofw!*<l`kO6&-TVDwFmXLB$=o4-=C`IP7JX#pNTrkS1pyaP`M;E4uN37!m)
zGeTDoUV*qkq+dRRCj}~hA6tBRm~v<WL}5~eP!;|gTrtQM5myYUiej$0>KvKedqSo%
zOs4Wrc~eQ}px;u`WsomVrur5J*7C9KbzF=DMbO1KuxAd5*(7t27*~Xon<whGnWUrH
z-Zr3T5q$<j_32#pnGB77E^;a6qMNbFffQ_Rp)E@e(JJ+FX{BP7Z<h=X3L4~h*#VDa
z6D-(t(p`yh8j_Hq_2K|<0-OCgwLi^fGpr43WB5CujTXiV2QZJnLQyLgc7wx>3>;7x
zT>If+%GZ-3hKcY3)ji?(g3v*|5Y%xzq@oQPk8a#Pe@zV!L861RL{W1~8Z-(9g`#Q=
zam4uIYM;bre@E@9H_#4hgW5q<n3XK#mAsxu!0<E)fU2oi(gQuMdO^cZ>a?^j7xU4C
z8q}BZ3MGY1DTWSO2u&5G6AdosBtCy}lpw#Wwjw>hOG_@=$BG}&#&MQg>AVi5YQL9P
zTdB4VJ?-W{KSTgR$`|PU3xtN#c^Ag|5<(f&k3qbRG16rqvuqYh@GgV!OKHqogcqOQ
z32gQ+85nAbhK0O_vzXaFohXSeY?BqCICzj0zUNQCAnLbOgz!DkG%e<0UxWKkw8V6~
zzXRI9(WaMA6CQ0y_D2jQRIH8&R(Q*$uY|$l?P=Yak+>Vj9rT7xm3QoiHUmet>sEVP
z-uR<eT5|~7JltPLYq}@t2%sOavd$7rYHmu}SO<uevb8nS2U>(Jy3<bf`_C0p$-|RC
z!^Od*ixSXalDuk(W^(ZqRvx!xCw2ngIGdR@m1o^BggKGbp&nk2_akmBwqp9bY(0+*
zXr~dn5VQ)34;)UX{Vy~~ydR;G@>i$GyvbKR(bU>TjS0MN&^8U>TCIW!qA*hmj&?$e
zY`LG}T1v5xDSplQQP|Qbk0aMlT}-L>6Ls{L(AW5HDXQpi@UJPw7*qT^#INk2_#42%
zN$~1h3cN9t%vJvx9aSXNXSH3`+bRFU%>QF8J_7O4JjFeTkEINLUyCOQ5;l+LC?3DZ
zOX>t{o<yKH!ofv>f(DENCOC?4wR=8xe2VE#)Bcm%^`Z%m4%jXHFU?!vKScY<S!rVA
zXdVH1bQRg5n-67#25FNS;%n4FB98esB%AckQ@mXP1g#6QxpiC>YnRq&Wwcc1bn+GG
zYu&e;OK+`x&-JHT?ur_AiknD?^qxm2X2>w!5lVchLW%0+8N$)=bZu2vmzJ!?gO!T$
zCL~7+%^+|&5}m8nmMW#HswQv88&s~Euap;;OG(RPY3ZHP-IBFjx^FF38(e_T%0k6Z
zWJ776OSS5PDmo5qw#cuxI=56&h5J(vKKGzesjD(t=LT^!hSMhXhOwCPVVwvCc+0c$
zpmDoawN{MU{Rdpn=+o)O#(ARS4GFu@%B_k~sg_m0Tx6}w*iz$fIYPOhiz(kcP*|OG
zl#(&GXw22|8KH2SBKDW!c}b#tAOA)Ct`OJJVf4GB7<e4!-zof^P4WklUz$Ix9W9I(
ziX*KQ$TBed&%BpLR67y=HDn<cMd^bO{s{wJ0Zu#}<fT0bC#YZmUVFkZ+0&tvfV>?p
z`IsyTk#Tb<3FC3uKEowEMAW~;@cpj{{cyXNC!pFv>Xiuog9%1<a!|GYg`7Yr4FV|j
z`p<N2RNqU$I#o>4<aGbLkVgX_|Ds2@R72U4&O=<H?J)3oEs>Yf*X>w7vbR&1=);b^
zR{!2O60}oDP`imfKu|n0|BXc7!AAAG2N#mI#+3;t75XM;*?nlEG~NLuaX0kU_GYK<
z`KC#c)pF^yUa?4rNR_GU4u&qzqw{90Re?y#m-*4Z1=Vmyulepa?(k9Ch;AK{;3AnA
zjx0jimLW!aGQEjV<1n4^avi#P@a)c1Ufh^2J!l~-N$GH(1~G@RJll^poYqFZPm~=)
zF{;ZtNcw$SwY&5-U3sh}FEbm`b%V*NwJFqv0~p(#$;&|<?-oHtVn192?(KUDz!8|k
z2k0DazJp20*JT)@)dsCrH>B$u>>anW$t{xp^~I-OUwZoW<)<@h<2lp4`h!|wh=|}f
zTm1Unr^W8n4{2JRqyR6C^0<vL4av!0ZMCto+OW#C>fMUbs2Ijaz#$CcJ^Ey_tO_;Y
zjZixXJzJqMRjLYlyIWhFXSC;o1Ey62&gkE7-rOKFA$E;4eVidz%x7pX598GuwD?&3
zD2{tX4jIPBwL@A_o6yF!0o+p`FZ9x*xTij#4eqh4p51}z2#(RYWD9+Q&SnN6oF*j4
z-DR7$)PrfRx25jI?rI8Xzg1r?mn-$Up{F5)3e@@R`jeM27)Aw~-5}*@W<Wca#@~JW
zX__j;b*15Tk!_`DAPl*V-7(N2V0L5l`WaA$>AfML1+Bt$H)!H+X(w5<;|!3iEfsy0
zR0K#b#KBIWGsTl+jV_l~*vfhUm5+?QV~}iJvn|}VZQHhO+q-SMd$+rH+qP}nwr$(i
zzWtmp&VAo=&UfS9ida>DYE{e;wQ^?8ks~vUir;$)FOpMRI(#DPvh7%nyrii_=R<>T
zgs*TpGF<{X`nHp0bVZ2KN%yX2-R51I5$qmWp<b7rb#i#E^)B8LnkLgo9LZBqTL`ua
z6j_q7N%Yv$)abhfPTeVGikGC`$5v+&>S9q8CYJ?WnZC=lz%wj33ZATw@Lku#4}9^J
z*94Qa#=Br8aWYrybWf;(IPTr8T-o826zyh*J#4T>ypr8c8h8ewIq|)+SaQ||m8jEp
z(s0J6SdGuy5A_px#d=7b#ce*qm1iL4XCOWaw&xnsH&0y3E$ylqxf>&!wq+e>@ZM7-
zb8t_F<46#Yyx<aINEd0n7aN+AuSyD7%B-C%qo&-ddYs#LE=!h?0L#Z=;O|2UX2dD{
zZk!A06X%xop^Wiov`OSeW(fGFU>qkk3WEX0_|JXmg6Sa*EI(P*0fP<K+EWPJK=S`I
zBiEIPOw?P9tT`8z@R&Q+3k$FmzzvKVX`y+LPwZDEn3UD1+`0tmi_X?pW*^go%oiqC
zk&+uZuTStr4*sw&iU_uKJzwA9J%4e-Kf;ap{8o#)9dhEq6H|Xfb;l<6<Z=+z6xG$9
zIG=Mj3PtAC#J+|HG6fcrhpo|C%q%UG%&5eNi;Nv+q?1$@2Tl)eo*5P5xwK;}2^Gj&
z?ng1WA71$HKp&lP<HEFfcZK=w<O^s+9T(Y`)B7nI#Ja@N)w6O*6${p@@W$;k$!0<C
zxZL7+pbXbcTnba5)HD^1pdd35TVJ(4)=k+$k`s>%aDnZ>8OrLEG~>w0$&qfCu*L=f
zVUDM$aQJNuvf*=Uo})7BNMQ1r4pW!`?Kp^Y`a7}suQCX_5XW|i?=mBy2G2Jd-J&yw
z<XY-AP750Ty&ZIZRczC*u4F;8QWeQQI#P1kE>BLTrV=<>T8$vGC{l?fAl0G7goQI?
zj#isMwp2|3iF4lY_V(Yx*6&1lR5B3U9<P_!JH+^#%T^wRR*&6t8JqFh`95rv5ObGv
z%gLT}>zWyT3+|EW0l15AyPpF$%e0DPT93Hh@zM>Gcg=8^v&49Ml4sh5(Ord$$g%T>
z$h`!dKrCTYodA~KPWhe_79wYHcAq7gAWW>yg|FP_E>Bq_oVu?*+q?}rR(M@ec4+)d
z!B|2nGX-1<L{Qtpo2S#Rm4Qns>yzn<&i~sL-9_w_y_7t#rgQO1fuu%H@kMM6HC$mQ
zx-5Yg>W~Akckg<YF=zG8Kcijnpe=t_Zc7NUi7SElLp4RoTUXfKwo?oyroM?i5`0<8
zrf$8~(#ehLD5Qx4e<2LF{fw%cuDqcg^>G#65oNG(n}QW>LPNW)$vsDX$NNal!lmrj
zR$GUR$N_h_`_^xXFa3f;V5w8Q@m$(C!4Hi<+XGMO24jq(!`Rrx3U#dW;2BEvizq!+
ztOe|w;^rnLDqkfn2|4&MO^-4ab?{`K#EGQgt99=f{U%dd_2d)#3s3O)stY18$|5mZ
zPQs&&S-@Zf@1cl7)`3?RUA(dH2`Mf$;lxCT9hsuS)+dXl{yk=_5ZJLdlV#s<J{>JO
zTH2>QG+#S+tGarY9er;dS<S9rmC2Ha-BRf@?^jF1-dB1{S`@5QFZLQA6ne7ajWvn3
zYxb@wNZh9t+>=JFI?T_z&K>oQ_1c!K8NY|bYcXgKt__d$__?%MXviDHpgA6%mfF6D
zyUySBmP)*qq$7^T-J0~vRov8b&cdu!?0*WzI<sNmBU-e+3Z(`*BBW-NNtHVzPDQxB
zpVIr<FO`C=qM$;ipT_9|5;PxFmb%ZCyVk0br90lty66{_e$zb+-Bi=9s8x79s6D64
z>^oUrcr9jMuC02TSg1dx>A956Q6$;jlyP7i1?YIW<VGf?98Fv5rkPm+SQmE`U#kc;
zwsyu(Zf!q3NsTEGJ+I`Ld}vf}Rq>>NRyB2dC4e}^6jB6*%xSQeo9jJs{hIlaSciAU
ztZ`OqQ(!uQL?5jq(O&b8zJl~=6b0SbnddLEBJVsiFvVu5RQup5&B&XHhy})bCF<k`
zHIv}fHT?1BoL2h{Qt_<F{)T<&OHxvSON~PviJlx10)ESX4b7q2_V;g3J~oY|sJCIe
zTZe!5=5BTSCp{%K+N~xAarW9@#zcE(>&HoR2j`vB+0#B?W^i(G@mk~0)!DkdfA36R
zYXL8)LGW#h_QOE_kdXlJ|H(wUnm9UH*xCMwVj^PbrD>_ACubsJ)N550*k_s5Pk;da
zE1CK6NJQ%Qb8GR>^}mHR|N03dqwmRyrx3{>p$Z@rhW{#HXhfTz2o$g|O02Ol#IgZU
z7$YzwFs~CoY-~A_Ede6FezKNIW?Bw9ehuJ!G(kA5zlf5Bsa&{^l9Z`Kn4YbnqmM&y
zb)J8Au&t?{q-C^^rDwa3V3A}p-y%#`z&H|IE?vgvXX1RO3U@5+5JAJgt@a<lKZ^ta
z`seNE^$+m>XvObr;^gc^udi=mYvHV~|AQO&-|qT{u+<i0FtGdicFfO({(rpd{|Bn(
ze?YATlNgi-Cix?Vq74vMI6s+*@>Q5PxsTMfIe7TF52Y&%%nQ9W*%`lx^R@Ws{1qUT
zK=T#9xF2u$_4+#5IT`=z{{JB}{olw?hX{xP|MfrogN%O>TZaAz+B^sV0N#IHqoIeh
ziN2GwqlK-R{y$iM5C*GE*lsW&bY4(vd>FKa28)g6G3-iXv5O)$f?B@BqYa8_tuaw3
zk*Y^ONxnsYYox>%jb0N6z)dV&nwzyAYE0_Qh#1c6*c}^odGdQ*FbkT>5*AKUmD5O3
z-siI<gYa+^PzkM72kxfUILie^x3eCBg3(H9>R;vKUJBa!-6EbxUl5<zZY%<5Y6+7u
zmJ7E5(Tt(AiQ@Er&TK+fJ(hKcd!ypzQlLsvNRM?wa=G2{B8-d(hRoYgOHCEP1rR9i
zX0}#C_QacTt4*T2_7nAZg2R<Ogb)nLa1^>=a;dB_uGQW%Q4Q_y&;~8w@t{@MT!Zu&
zgIJE}L9YXSw1}G4do_L=2?v+C_f9epF=`nJZRXQYoT<*7*u-QC(F{zSSx9HSMdaOV
zO4WoQT*7kS{mIRfy&7Szo0~p-PaL0Xh&c%|x;uNY<TQ(_axPG%72vT4q<%srngPLM
zD8rg-dR1CjHb)fa1;^_g0M?ze)fbsE#=&(%gny`y0*~&HrpxqGGQH1?;9WDA?gzWQ
zu(;O&UlzKux<jyW-=X{fEAOAx&HlCDW3bOW;yyh)e{Tj#rBcc}7JMuCk#Z(|>kbj~
z7o45V<oSc0zcruq<qrDw^}Bq8oe}>bk{%c~xCaUinlZlKPe+<F9o$I1aME<3q1_y4
zTd?MD^;j@*4|TyA0{Y!p+W>EUCh@1KM^`uRkU+W*Ko(W;sREBYglG!y`L%rMO{m*D
z`@bHwf6O5=)zw%57ytkV8~_0S|N5vIHK|P5VzVRQU016m6LHEZ7BZe6$43B>sNo#W
z3L}ENS@`2c8fKw{(t%VJEp9pb)*r4T8subxkLJ<OwNS*aoK?}!rtR4(v8#*oAKHiD
z_&mUx_4^hd^aOhK(Byptz53pR^nUvbh4TY~<s#G~5SZioK^h<mkpHQL(1hy~QGhXE
zy!{|zU_=qtfcf$SFoh0Bq!Cky{KY6{1H<L_iB$<wDC`by4|waz2*?}k4DAD->oLRK
zJVqeZeQVApWPmA{1&Ooig0omxC3Dd@8QyNWnb)E-uxI9y@1rD_Qy!%*XI~p^nJM#B
zFZD!2a7AZ<Yw@J5x+o^Cx>9djUevrSJ65iywZ5a3qE=G0AvU!TwIE(SHvS}Svn396
zmRusTi;Kd7V28m6CY}k2CIl4}1QB0=$=HpuB`Nd}DYrwaF*%Cqhq4>{Cl*J?08eKT
z8|?5iG>~I*w2x+(kT54YnblU5k-8CE0Y&VB>laZ7N9ErM7@NlRO4CVNSTz4$hm165
zpG?!`D|AcEwU!rnyh~qSe(EeXr!Kvji>U(d>sVlJelfX16D1Oh=ZmNw^$vB`I8&oz
zb0nFe6Gq<)qv2KxiA+a8kKVBp3X2EAha`kmlIve5s%rJDm@lg}41TojVim`nrr%cg
zSPn2o_<;VQ?-1WWAvrIzj3%5X=Q;}4)4&2;3~v}a9uQ`&<RQ#Gk?$s>6M!F>K-<^s
zu?TVYh(Cc8?WIlN-!(C3xC%$~CHL4O>E&3<#42EVP~54+TH#|=Ol5~*iPu|lbzQL#
z%)3klQZ6ZRAEjF#qWH7(@=>n!m-C|-`cE#%_ja9wK>G^)Bn#G>q^5QU`t8Q?TJZwu
z><Y3c`gEK&Kivmo_|L~##_nRpj6nJQfeIhsjZd)350%H3(HC`RVSDL8L!#;fH~5|w
zQ07q{3C(gL>9L;=6^n>2Q8E-qAsf=4@-TxMI0qq&;@?9<hA1)la7uagO5>QU({L;0
zE;hogx!~K$X~go7O^|K{6IVXbS1PW$11)hDcKxPj<t8TH(J!8?z@5&~h-CN`sWk;~
z?=qTGr3p9h?PP`e0XW?c;5ktvpOLQitB_=?sjh?3t(aFn%wgZ4|1!pZ_Wk;g4-(KH
zV+8rhB;x#6V>Gp}{bg<8Y2x^wyZ(~;metR$@B5-p*#2jgCUq4B%)eKvnGdGDZ<#lW
z(D-eWE2&B*9=*NChunALY2EniS=jJGuBYQ;>(Awy^>HOC12AJ8C#O+(XGVfOA`Q+7
zFZE=RiZl|V37w#T>(2y5enlqYM6zH?6a}eyYw9FI(QHRdVAL|ep`dJJGz&^7;Kq6T
zOs$6i?kzkcfea`o@JlJdd4T;YWyt205o~@*C$f3|T&%Y@QZ77SBgjm)e9p5iM?jf5
z*9=!2a<NAY;y!!bVUIY}0L-v<i%VMEfEBf(Hix@9@AI=)clqsV2|orMhN>}1a%Yo9
zKwC=~tKfMQqk_dHf_Tp@z2~|htvJy?jm^Gpej4N_%LR;DSi~Aw@@c(0t{wq&IYSQO
z2x7c;O&QP$3fky25r$%}$yKU07+;#K$H}-2rF$OR6rHzm2VUo;e04jQ5mBc&(64bu
z)G5rpu92+aKA+xznoV~(f@uEh5q_B_e!P@U1M@hGsSwn|qnA4hv)2L^UOQaBWaSlh
z*#d`Jpsb4XPEz~oZUwJ@5=YS|!Cuu+z3v&mxyz~pxYB=7Y5Gks$>ON=%b~f}*CxXf
z7J&C%KEH(}Hv9Bv`!t}z*!+RF)B?<gc<T+F5===#+*8eZ87XC-{5|yAE2f&F7086|
ziz>4y_sX7B{%X79y}7|d5OS|_C+WzhaeDex`!Pae!!#P8Rk1?M+`J`(H*-uH&faUi
zzER`6iPVqH)FxIEOZkL~q!8-_@i$_gs?~qMZq#fymsNR%1ZWxxR)j2Q1?;M~l*V%B
zWkVMoXG0m{@}n+|Z>$ex%!Fgr{_W+8kTJ2S&CN5=cWJoP59IKAk>JYIe+6(81G(h2
z9{A;nh+{UQc`D|1N@lb`&F^5{q|YDL$zCRccPnfBB@Cg4_h@O%o}c`===K{EpT*#2
z*Bfxe#C-!0Sq?wd4NuD}fUVz*zgh2E!FyWql?rUv3dd`P<1*WHu;}?<z(-O+N)1BH
zB19iH5l?lpWnj9R=)m{i!lr+GncSEdH1m%iQzifa!2Q3NkI|K;jopR{3SaYsQb~<=
zhc&KIB9L;(`*a~Jiz;m}tUp_<c2F|BM)dCmu7(3c?nF6Sr1e2`A3*$9`cH!HJMh*M
zosV_ddD)HlTuG~R><_(@ZKvsn%oMMJFK*2@aozXnkVE-{n<`q=d~7H`V1<(^plGOU
zpl&D^RBzaGEQ-|%e|0}k!1xQI#Rz9zfEo}V%yW}F9mGz?TbI&{UG-c&Kwq5m1Mf<Z
znO7v_-9&#`p#*-e5MFseyrq)fxrOcU%PcT6o~x|bF6LdC;x6U-7a)!Wi<`*HE{JJR
z15xv-`Cd^@2JmTXTLU>b9UwK(6C@rrLLHUcunCUbM!==*D%G|ru8GdF%b>~dv<8Gm
zbUcl{{;Q(ksHUeTDU*rwL2$v>HK75h1K?xEzpeZEZB|rv`<R)zSL^-*GBZ({<{pYX
zQ`}WzDDGLQY1O_ML3ab`0Wrj4gjUP9%-Kg%+kpBjYfb5w*&EF~FObkiSubX8tpQhz
z)GQem`o;(AmH}^YhCadinH2VaE0Fz!B4T^0(gy=|;~(=PfQ*$inrT@u(AjZ&4m254
zP`GtV#Hb9BZSITb9C*!|_F5G?_qAOSzUn)>n;p&8Y(^%vO+G#<{*1I}RWL^Eo2!mn
z?0=2#Ip`cKb8Nb9_L^su`XN(`2BYmvqowGGG4hLd(cMB1hZX0NjdGwy>dyD8#h)xL
zf+YxeIzeZu;wTM8#j^lPAu!f(YVeM{q7<JsZ>_S>?`2X^67Qk~+Dw=gW2&DH!)Hd1
z*hPh@g!_*h?NT|F;gFJweYLppP>1V{&ujHJI$EB|_ObmsLr$VY*<G}-Se>bw@Ot%?
zZiMQ;mkBO;tTCP)O}p#LFvN)9MD^B?Am|(s7(r+rw8IZ(d~RRV3IxBeHhY5|S?`Q?
zn!gl9F{B}l7mUyCO+GN5wkR!29DYqZ0f(k#I;NUt6{9$RM4HH#7)D_e&PRQ}^^R6E
zk*tVf&rk;g(*`DGql4SD#JIN093hPq2WvlvIBR(2p93d8_~3ls4ba!-*WO{%n%q8s
z@4^eaej%OXQyf-d{Ahw2EZ)zUE>;~Yp2sYY6@HIgp_LO+v|OBg$2ciJOl4)V{Uzwe
zJW^E_qz#|CW3We^Aia#S)O|=)4FRPf8@wGEmMz7}Z9bB`Vft(O8G|>fLb&dWoB+Bq
zP7}9cCMt7@{sF$xCFi_(<uk@}Dkd7DRei4my_3SADH>gLGjCTEvW3iDF~pdfDMTBw
zb2gz0VyDs`1JXZR$w7*PfgU%TDy+in7IO$?psRKuNI3?6-BiVJu(R?n5r?HORJ~f^
zd^bSbypQ5&coHpWhH{ea!m3j_<on)3{;1p7oTT;SVpBjNE+{xo)AG>Buz`!v050g>
z!BYn&WXVUV^F=UKmiejfX4C9Xt)$VGRC!r^V~GWrU`jn0n%%G$FnCQO)%q$>nn=l;
zv@a=u{9+1#y#`m&uFd8Eq_h$#_Y+RpEN~vejzkWX<T(#wb`ZN|K4r}sLyVz%o|Vp2
z)QNpj6*(1kIk61EFi@e_BE^2k9=-3mMmEC$cs#&7OCT$RiH8rZ(F%L4v{k}5jqgi-
z*BrPFR|uyfbE9^C;ZptW&nzfG*~$UoWmk~<R40l-EDZI7>%GZ_yJl1tEfA9lv+s8U
z*nDyrMjQ6VDD=9+=QbKdP~PkT2uBicO4lXb{lR>aGJEDIy$?234%c0e$>=C}($4{7
zVb)&*a4TI0nt6TcG{!a=HlF}(T%Jl*5ZdODjH772Ic5>cRD|zuFl~^35(C$$_6j(9
zpN_*0IJ3ocw{{#w7?$?b+qAZUy~t=Er~PtKX243}w(%Kepa&tPMS+5|)AEG!){JcG
z^oPOg18L$c>)Omb5Si_@W2qC2Td`>p4=5Ay8rFC4ze5-FByTg0ZFy@{N|#Whe4hs%
zY7qC!p7Iq>`;^b3OLYwpMKBFd0!(&)m98>Z3MfHguGC_}vM8J7tv&O=n`Lu@XJP$J
z5O28=if4)xE!z;cxMWDK)f_Oz4N(H`87OHDblUT&DLhTro0axdq+jp~XByYzD|l2J
z_)##c40Ilfn(GxEl4kx<xkh4=H;^!)%ch7dmLhV@>UXNxSSfoED~045vAYc*>4u?K
zJU>!up?Zo{-88>vY5TZI)6yvIni}aM%&r|1;=v=0A{$f&_y*oC-aOf7Y)_PKyOnp>
zi!Vy(_mO0qs2P|nZ!O>&Zo7S?8Y>t5PCpOLS@fQ;$uNaI6ZrsIa0b%;9$vm2%tG)r
z5_W)rs(0s+JfY8O#y+5WY$7)0h0{!kmMnu4%u}o1abbZ(CJf3TT#o>Cx5kAwcIc2}
z<k@JP6H0w_qtV+n3>JWj<FyzG*@xe@+$X2G13n;w!qFwaP+fvF5lU06vGA<Zs*Ju%
z(W9NRo1vGb`1^0(_Mc!?<j}tk=qFhH{PDQ|&D&c3a{Z<M%hB<d$A3nvQK}oZ>jDVB
zU(_oz!Li4hT1h-YgH(WQ>52%U<dUHBmcYRF&3~U=V~y{)Ewe`on|t(e^En+)UTW5u
zXbvj?DA$rWi1;uFwx7VAHFMhxypAh9p+!(TP6c@a!|g>!u3{0PNeuK)CWQ(Mol&Wl
z0Z2(Akku)WL<NX;f{X)$1~{A{JX0BEQ{{5@k1`J=RuCZG0TsgJH)-=Ev&6j3QzSF}
zmW3mxg6LaaQGqg+%(|T3*>Y?C<s8*zvsO*gHL@y;S5D|s%+srkTb}sI)l>2km;?OI
zacc)VpwS;L5v-hd`zII@oJ!DIV6}=maM!M8Nta>zDy^(d_D`xl8ss-F5|?K5cUoF{
zfO5OuH-iKc9!)CL{%B@Pbh4SGP(*l92=o2@y^t94J;U8t%h*EH39_$H_fFKvV9(R&
z!GiIrY{}uZcJpIbS&X?|`d-P*K;B6dQ-09nL|1LyDVEA#jhiZ*OVgW6Z&H9P#5fRE
zdX9mrCx>#^HuY*f*t8%ahh?<VlfmR(bqpTrlm3GzERgeA_;o4@!_1*eYh%-jFqb%U
z80=lwG^YFN96P@KRY#ABd{XgpBu-U#qJE}zOVy16lQvHr@(_LhCT`s^K%|Z<Jr)Tm
z_2-L_!9z<ps8o!hjZ&PUJP?*jh(L&i%Y(scZ|Nna6JNSgHR*DH1d3e6N^=Evos$rW
zFkIPv0wqskJOgcvpCqzHH=HRN4pyks(A{wo*620iJ)M%QMlk>_h>%B*^6*u*Fo%e%
zUTK@8%dQ9PstxLId{M2R)2<`bRm1fs$eAeM#WdFgmPu0hmqsLG#T;4w-P92zB-hzr
z5*@r0guY}`2O5>`p7SaGle6Q8vU|Cy`h&~27ZYjZBla;x>O6JxZ%Fghv){<=ua5dm
z%ws1SUxY546ET!T@KlMv@r_jjuB1S=?W;_Od=HmhI6<aXc;>~$v?RG$=le%oP)l~v
z@%I{@bvD+2#1oj}G>hsEJ@PVA!e}Ci>2GW*Il8z*2~X(%I_dorstOGl@I!$D0PI5l
z4~Fl58jn$v+J^0h7(x~)=>-y}E1gRMk<C(ykOdMqd#)%Vi36QEHbF>VA!t6jA+0S+
zImuA`1P9n9`3wC!_V3e^N8oq0$1W#R$svbz_K|>;fwi@*G)^<K)4yGte<wQHdGp?9
zPX2zp@KXIXASA+iD5bckP)zh>d8T->xJ(FGE-4hX$QZVH+0Hi(8<tj?z{PUIYLh;w
z9<`D3OnkQ3bhApP;1oUc8wU2dnXU0YWD?aylv7?LUMO0F>M~y7OZ+23KU`rvt>7H)
z^IZC>OEYaKWD+!x9R+sj{lp6Y^(T04S!BS2&AU8eZ)!vR4|~l{>-NpgO44su`fgml
z+2G$MND0@l_-AEf&N|OVg@%WB2I_UHX!6H#kO`~lLgYodzlydgPb5ubF-qP#Yl%TG
z;NUlgybZ@4CoJUdZbf>Qcj~Y^JZC872QG)M$oCSaa@Yp0rcQR~0n8u%3Ip6y!i1TY
zgY|k!yf%vd31=3?F^QmUrix_d%LQ^?A@g88H0yTrYPQ`zWs#R@`(B02@W>uSwmI-%
z3#hw#P*CG;b6Pbl36qNl=nvh-9QWe6RB=GExfF4*SW)IpOR=loYVH1>YT{@hTVq%_
z1DoEeEp9vl^M-O!JncdVfAXHm9uZ4^z+J8mE~3=S*oS%KNL&n$mnlsT7iaFXs15(6
z1}^e}(?A}3LyRn7IYHO=Gpf;uv=d+*AavE$saBdl<TJ>q<5ka^Yi59}$tpF->#(Y{
zJ6?wWBp7;eZw`3SGYNEHQN7i>6&P<nWH%55L2DN<hz&Fv1X<N?kCn3HQpuGf4GMLX
z`JoPZbYNr!*(MzAEQ3q_yOTYj0MH%Vk6^ztO8KBZ6(a%MXW;2fQEi}>Wo!a@x)R6g
zGUm-Jf~FbFQnL&H3Dt2jR$Dp2h%D|P_$#9+1LxCj6Uh-ikaC{m;1y`obz^9+h6;Pi
zt-$~@xpff9z1H(yy@(p`bRQA_<%q~Ox;+|O(t_3DuM>-{8$BG#RRz~Qu)MM0S1-Es
z7_ffoZ}>HiN1gI@B{Pi+{DpUh!*S2M&}vv_x}E%k@hh5Z`!m;f?k3$6{$<ygY8Rr9
zhatfVhiX#W#^^S}@}W2Z1Oc0&T%$;93D{1T3pE&HqeBX{C@m{o;xOXd<zG<I8Ix#U
zzxd1yXbg_d>r7lP!YPB(S!b*f%@$+{NFp?zgiUw4FXExoet$|AAxAVIB}A>mG9TtI
ziV9jL68t`>a#wy$PSYLnM>dT=a6IcAf~BN~OqwnS{JXrE?xvUZ@~)G7t7AG9U=X4A
zxWO3UuN$F`zi8+#z+9pOv_s@yeVn1}xr?4~Hd}&a`NmhfWfXY<0pBqKG25q9g38r~
znJ1`iPLTTneMlPB1-hD=+9h`cS)0*KDU{z(2CyFsr(g+`__&LFd5<=ct&J;(G`tB{
zcxohCWnK&#P_Tdxw3dXW`4UC^x;?aA0=k3xg!q>Z;1_9;tfdr&DHrQ6iY|Vb1rHU6
ziXUcY7vPKY6=%TYoj@M|kSV}buRn!!>Ee04%PGl;X2}69q`+w^339-kPt*OqzFrkj
zeova<>(~fyo~q7a%Y<|0qUqGLr?+`CXj<O^%|~O93SVO1V5?t8zGG2F666L$0_NU=
zWCvxQz(k+r{!cp#lFlvaQWnCVcW4X+j>pKJtGd<!sN_+43l+L&?~H2B!g`y#N$!8=
z%QUGd#~;7|0EB+XvY#{mzh{8{V;430Wo>Qv%gE|KqmchN^Rpp*U+ed^;?w-9HsGFu
zYGh^@ad@b2gtTm#ngs)*oohytT%@3iR(R~Wk%(g>rJj}sM-eT7uOq>`iH6VVIH0Uj
zt6($+aoQ)wDiG|S)x&<`+L9}{BcFZ^s8=_~XYM8-40Ck$J0TesTsQ9<#4JE`+|nFo
z7SlloRTRP$*TivOB)7$|@3&8FM$9f|ndESQ4ijT!9v8wRiS#6q4RpUQ)vh}3bhj=Y
zVacHKH)PZI+u)mAxJP~yig+L(l8XHmBU9&A1&WL|xJlZgXLQAILbBvT`W8VwQlJ>n
zmfdg9Lq2?p+TC}JqoBR7Gm+WbF|e<Sopl1B3nSe+HA4v-Ro1VdrL-HaUZMK;Yk!az
z(;KJtByda-yU1N^(>cx|hwebEU@rkviqjf`20&aAA~0wR!E~(#3!<7;Ci>67?E5B4
zak|Z-_ciIQ@*Mq1Xa!}KGMBQM$|g)a2@KC9Fcv%g745Ot-@2~`kkSE4Wgi)Uyeqg%
zzFz-`Q`ee}2)wIF2N;oTz=%Y4H;;gXLDl9a_L<qCAy6(3;hSl))2X$OfjJP_=~?}e
zk`i-A7NL`ynUI1z4BVc$n1nfZp!tHjM0{TiIPn7DG77=pxQJ*NS;*TpfqisAxSXb_
z@{t1G!xU{ZC)+WO?JvA~H{TRBZZ~)9(2OJv?6Cl2y)VFs!f07~h%wZEhU&ew7<6HZ
zNiv%hXT{WQoVR?%jq*W;P$y?u&tKx)yL_}OwQrr|F!FboXbLB?=35;tUQB9QRy^7^
zuByc#Tc6MIG&yx>L`gUX2mwV!46}iBa<rfJHrjOn87GzG3chgSv+%Zp3To+tQE-%7
z-58BH*n4+r=s$Qt@q-FcRJ8MIFijNv3=lx;@~SHZs2u#%NS>>N-q{zME_&0d?>e3&
z`sXCRd*PB*tH&c%E5xi1k>_ar%>_RBB8*BwL<w0GT($tHGB2nk(omdCWUUH7#uET-
z7}qFeH9zRHaX1k-nVu3!gX9np_>TagaKw<4%aSH&XP}O;`9g7_2W_jSlJd4zeIe7}
z+trV=NDkBt>?EqT06c7j-Xn~BzaBYn=|x*XGfYh>ighm5oP~9MnjE&XC>25VQxg3Q
ztuv#kpR!3)U~8kMwIm94q>RQ*=3C>~pF$`hDyWP-(JBY(S??N095!&MyNgc?cXW)Y
zl*4x6wqFZZbtKmHzqj;W>87!9%99F5k)nQ+=+-e=C70>S7yKMeL$dOVW1uUwq?CPJ
z*8}89zN4&LlQFbcfh&uff;CsYY>5sUO?gE&$=eB#NpJ)+R%KVW;vP}D#fY>?mvrzk
z)`~|3hKpoPA}+1LS<e_F@>6yTFKo|R{(8#eIe1EWwK~~Yxo$}YJ{7~*huf^pN!<)5
z#nzNnP*>wnCA;A0>JPew4?BKfwxKU{QpEeX7jwo^n3q4|md<`mcU4^m6fHRn2};g(
zWLW=e-%9!C_g@GkaftH9!&G4!vG=MjIPAH24hsnhLl<gFRUFRdOj7)OAvfaR4TyD6
zMf(IaTm&wTXV6+3rbNA$0P7K!L=;bWE=dG)d#X43{>y-*Win~f<JEJHf`%YdODv&z
zM>=5M{uC@JFsQ%d4st2)?30m1%*>g1-1uRm$%_(M={Oso)d))>{Q3s~)9W^6d*$h#
zzi>6nl{@#E2}mZH^Q-D-iLfKT$L!5j2a2j_CwvMZB=cr{YT+PqZBJ~z*>>KLdTg7d
z5zQ*Ct9sO}1Zv!yjI?q5RB+;f`j>dNqJge<t^1rbA#6I77>uVY#55#lmShh{Q`Jat
zq~|$_oHEfE9bMVYe2%tg0Dp5-B7I`UyCw@_5MljkgQ7qbqKtyzdV5`d!(X~$G{idP
zB?7x9X@70q4bnLL;yEI5Jl5lF@oLH*1s(?O67bE{3nl?)$p|qno%cdCS6#QpNtRXa
z5?_pf#bw4xrCHsEQ$??`jP)=irOEZ#g%u7spm}?O`h9a!^+-lp@u5*Cs(MiFm9{j3
z`0*=|)`Z%e#v4l(B7$*ZWe0Up7S;?FOzL7sT%Q-J4D&X2N;F>$o?O4}lWXwVr@tt<
z+5IbcScau3k@aV)B694U>iD*hx2I&w5_#Y4xOyQh#JN&e-nY|<W5nf+$=b=*M8|!b
zoL-=#)AMVxxUW7QhRw{k=?M6mwF0QFi!t!U*jF-Jw;hIl7fRX7URZ0;*|JKDRZ@Vs
zs;3-c*g_=YAbRQmen2=68CVhBDP}Q9f206Bpl;j95Wk;+R<GOj-d>D+7B(UXD9EO-
zN*7&lz^u_#M$(CeKFO5fwmHat;IByHJNzymd{$lde!<K3DWSD+n0;EA*Z+3xFnmi{
zi-83ISYZGFApC!Dm_}P#(oUNisC-I2o4EYzc5d;KTrp{nFbNzEObqnJ`>Sa@20UVL
z?!|<}hsm3Y+O#etF8ij|XvTiD)Zts!8n@sY{n3U4{JEuufY1~gw;BSdD49;x)$d0h
z93uWia{Br^72WMG+%-2ilus#j{rHRMD^>_!(t9W8s;B|JQ~+cGsM?@1s5qeMR4S_G
z#kuJ~!D7n0g?VZImSUe-VC2wCG`_^Za&YOy3b<h>_X<-FI%t_dEHMI45<KisON!YO
z{>)%g4yDl%ZHV?{xtc+ii0KImSttb__QkoJuuB%^(*iDF;?TE%wr*&X#G7frPKn$P
z2GI^?fRhf_iM^S-(~1lc2DpJXIu7w-cz~VSyl}^fI9bu+Ie<=Jm56v*QR4D`oZ!n8
zEOPgQ-aLw6PJ(nw;o|cC9xBmz5#sWJ9-U@!X8U2x2HB@+9GLQ%pba!DoTxE;(8m^>
zo@98x;U*h+BY3doGl46Bj-6@?0{*%MSG#AEp`c3;VJA72LS4-mDr?-)5;soPpCqSf
z`@9vQPe`NF>(Zc9V_n^5pY&*To4)S-G)+m*x-VD!$z{Vqv6U%{4w2E!OI7PW$Vj_g
z&fde4Ly+6gu0rvY;A*vRS$p_sD4w3<uw8VkHv|~SMjF?(Il;643h#MXZS>xNb+Kw8
z3t?J)4bg5H#Emg<+Hg16?ETHp!sXrY1ZTTEl;p;Jk$n;hJz}xHMcwkWF`=)yypnRW
z2{U3(!O%?0+qwcLdy|Ka4ZdX@9C?8GP9-257D=o!hH-A;UYJV;6p%7Zy}Z82W{+*d
zRPoTH)*-`vqhp~{>l$S`Q5OKcB5KyQ<an8mxSp+@7cl^ImEX05BoNf7G-Z!t6QmH_
zP1YJRrMBTZIYhev*AFdQa17dULa*C-d++hns|T&h^LN~J2Zoj)2F-l7vzF~d>D4KV
zYu*y1nRyk0XqkA&0`P>d!5fbpHh_tcM}R>l7SO>J+PT>28zP09y-{R{n7KL0BFPN@
z2jYo76)uE;8j!+|3P)ZZqcAV~%rN$B#4p<6r#AmuIwd)sk$^k|pe?xLr6npImGGY5
zZ}<pVm!%j4OA3X<a|tjH0`-x6oUa!TPs?PdUR)t|uc}V*;NFEAi80EuU_SY6y1%i^
zg7@tqDP-ELQOncV&dfaC72-Td$^BE=^3dILTD)77*lkpNfeblrb1sH^Z5|pD*XFaw
zr*R_XJ!UxjQ#O)yx!z*^Q0v2dnw5(NloPrPpaGL*5gx^mJs>fcazY~wOg7a%3MGfH
zJje==+v`&os!;U(8qC+Q(3bZ5X{84!45s6rb*5>R_Fmur$hskYW0S3Q%j5nuZi0b#
z&loXE-LN%Lk+texjlz7Gf)c4{j7|!`CD@!`UvS~7jWCBat2NZ<Fwc35H9fi7Nnzos
z*sK}%twdJeQp*;D<Y1)npmkDJE5<lZb$Jk1A{on~UF=$>K-qkxloL+ULnVsmd;)=U
zrjjWgx<oQ;UGS8(#^shOY_m2!`wA66Q5_Wo@MdlfWh}@e0PD;gC!f7`p-o0DO8vT`
z)(hzwMWc%w{)2K=nbP^HZh1zzvz;F{gQK<XESVrG&bkEP8;c?wsdyDU*0g6q)lNIk
zkBM=kKLMjVrZ7SUdTPN@HoLW3yFshaO-oB?dSmIBaG(28Ro6RZd2?UFVAh<ec~|?_
zh8yPlMWb_E5?|3_5~GP4W*=Kgtq{&QQ8cWkNREB*#AW&R*da=E^S7(|(-7?ZR||hi
z?R<(u+EVrW33Mfz)`knMFUE2K_&KMGM#mXWlZr7$9k>r7$>07{5va})UR~0qV!7~5
z8W*NsOX(Kvid#NU@5HP1ylSh;(Z)42GRV!ggO;k;>*Fhky}EuR)w?$gE^@qe=&;=v
z9vi=9>|eRV3Y-s8vSBnl$GJPh)7R{r3dGxPz^m%HP6>b2C|rq2LMOX}K)~JVcdNvH
z<IwD(awE}U$qLp&UZld<Zqw&tOEh4w3Um35@pDCBv(Ir2EnT?xalfwNfTU#6^dTWx
zSGktPm9h}2Td(6}bnpojD2xkika7EgO!AQysuhu%VayR>eJ0-1<Z}gF@&)#Q%La+$
z(vq4HHLi-qgc)lRQoiN!JRy6>FvO3()an^~KJA0|vyEO+E10p-#wUtXm5)KrLx}KT
z$<9$&ghSnw+4qRd{c`gc8aUq~x-5V#`Wcq`Q>hs|l~>l7I`42El$w4(xmXONA8Yc^
z`8P^JH5Jk!tCy^8Rn%DwRqAh(P9+}*+=3L_P{^NFrBVi<Nim^%nPp1EnG>Vm#5m5?
zBoN53s~4f)3F=fx>KI`W(a50josOVCm1(Fsf<1qw9HCq=Q_2D#Kd469Vac>~zBm%a
zB<-+Ez>NzNu>}s5kIPCN>4`vW+IM6p96I}pPq-E5LeL<DdM*UlHk*`?AZp#(#JSxm
ze)$msc4lV+=na|R4FV}>7}+rrwJ1OhJYIvIP6>$+<@*Q`F_B>GhkA_*5D~TFdbu}R
z#w8YNNF>&5wV8pNe9mZdFJ<H6cvjth4}HDRUp45Y)z#tJc=;?qfRl`|p_3Tt_|NTq
zT?(=rDbAnowPCh>`a?m}e<n(<wd^h&5~~wxT0VQ|9RChIm=;N%C`559y;MP$+-K;t
zgW--yEr662r4=PDmJ3--;nMUZ63D_C3lJHtE3df~0K;6FqfiLaaSFxI*#X%A(Rw4U
zmN?ZsrD7`~=3T0IUCs9%x(havG-sz(v~r}WD3TRaX_`&#@CM6Qa@I|Z6;=Id^=XHy
zv6rH~11kAAZ&bQxBn6`+<4O%gGUiO$Q_vehhbC+fGAH<!mK-zV9-NM98JbD%!GVYa
z>$~K2>QiIY1M0zrq@|T@i%+m$dsi3R658pc&!Emn=*g0gg5x?{LpcZ1fNqTWh5X7-
z9-$7aCcCM^68A?;P;W!Ig0Gj84u*R~8hz)ybLoe~mgTwF4gblX;@H!&KXQ9UKtQAC
zpR2W+@<yJwT6=k?5iqUXw7~vpFz$~@Imt!*+eGqd@F)@xSQh9;;fk7AZjfpif*Tj=
zjEt(ZKe|=7jg07iZ+JXT`^=tC>yxvLXQn7mh)lIt;+{LYgWc$<Xl-3c#3y+_^Ek>+
zJ(P)!;FL@vjZDmo%*&Gi+)D_mJ@&E&DXrciIYnzJ5x#C?*)1@I{^7T>T=u3!B$gC2
zYs&XR_!Ghb&&xnQGtBZ!qnw%($%^Kh?*XwyKgu1<Qm_o?GQvecnxh0SAaP5LaHsPa
zfM#y{`<qe5kjiK97@C~d3N0;{<|#?BXW8(Whuk~Bx#0{{G4rmUg5)h7iZtsj=y_U7
z#YH9V50A$P6u0nVDXPfr-(;sErGP{F$+gv-;xEYy9D8M4hqXf3Ct3Ovl|cuSwLv|i
zfUt7Klma*lpGmAJ(=zI8gk0HE3J@e3fM|u<Yd)=OM>iqK+A-k1GH#Xh+J=t<9E8dT
zH?I#2Z5JMcv0||%mRa2>J+MkdrqdoU?Zla8souYNv&sJ*ZZ)1P0-R{!U8t_&NZxDK
z9$<-@-<OwPFh1juR3dvMTCwwX&IVi`0!zyksul5t`s=0=k@R@NAXB%fXY`lwQ6-4W
ztb?>)jB5OpI2$FrWTpSU!TQZkazY-tDmFQ$d23Xf0j_QT4|w%zAjMTjKYXWIk!5X-
zX~+$B^%?7pMz_@~uBL&RZ$7P%iUkVQ(vW`AYlHN?vFdl2SJv6+1q{YH{V5tWO%6|c
zriBSo5lfh@*7G0W7LQuUYw9>(vCB}H5!7uC*M|y-qV7`}BTPm#+A>!c4(tnt5lO|g
z0>KacSG!rqiLZMCU2=qXJXd?1t1urfkw$eM&pdH%$V3<t?)|DyrFKO{o*Y%vTXB(+
zK=03$vNA!#>@PaH2jnxa)dsVBWF2lzylUvVg2L558T0;5grQbgFqe4|1ckrx@6yNa
zmFa>$(ZI6hU^n^t&cQX8?Alw*5OqG}b2`T{@+CH={2!a|#~?m@+dbkF|F8u@p4{IY
z0q!y48T&+L>v)ZO-j4|KV!DeoWYyLa2;Fj@^5si9NZ}6%23cdQGlQ2;0{}-<x~IC+
z4yOwnOXZEQL@gfJ(~bj!AfLTomrOK$!|?tn^9CEE`^1^5vdzLP9ltuMEZCmdDV+?J
z2f~m6*E!eK>yVnL1fAqYC>O9l-$lr(g-nJrn&ZzyAC3xnL<EkoNUwvF<P|WN-)qP&
zxrg(xdD(?_Trd9mWs{sy^q?_n)Lw%raC1@@nrZpQw~N3_J)AYWyY8@9tFKUJW}dhY
zK3d*Ei*MMFU?*2Ae}iGI{DEvY-GID0-D2c93NEGAGb^xzBI={ZJK*V|?qwqx{>|dO
zjwc;FX1viPyiU!G^PI_Av&JNJC)Zbqb2uQ{R$D6)jWlw^BRuPdO598D1@%SFMZ8UV
z*mB*P%}8sLbpf!gHc%cMz?@Q6HYyumqs0gmk%M%fK&5z>o0zZ$2~M8p>_Ta*CI-Zb
zId(s+hkg^>)wgu3uG#NoiRjdD8u-O&LKo~E=HvLt%+AHm$gZ58Ix&e$`McT~&Nn8p
zSUkxW_+Lq+fBImqAuxj7{zwHc_5X)<_dn=QvtQ08ZvV=`Ug4}cZiwAy=`$wd0g#~x
z`6X7BN@-V&s?gL_BUzAZ7ej*v2qUsV*aOZYJ&3<Wf3CXD!1(n^hQ=pd4u=!(omzRm
zr$cub-*oUAekhN0@RHn!m3V=_78X%RU6ACeiIfb!j9OVDcWn@gwR{*=Jjq+3PJQjh
z%2X1hE2q#c!~8;fg*HrYP;G=NwLmhfdkhS%<N?P}Sa2c2(y(16Tqb$kQWcSfo7+!S
zw&aGMh?>)T*hp^eYT{fpJQAB`y0yUyA!sC{Gpe-m!8Up$9D(}%!5n3nd!YPq>Bw*%
zySFsfGr4zaYVSRV$iQgYnhr6aDZ~QqlACTwMEpzu7Ly5z>gR8=5N6^V4VdD8AWGKN
zqFpAG9~$D&k#g}(KJ|<)O^wJ<+o`=yD952Gz6b?mwM95qG)Gu{vn4gkJkp?Z!j5HZ
zg002uiPaO|%de?V^MLmLJgqf?VYjz7^Qrh}OJ{Z{2n^Rzn%L=B!EKx#9i<(wdW1HB
zALwY%r4-#CO4(pI2>Ar0hAgKPSJKeWlX&Mj0q$@0k|iuS`wckz@zc^&C=Q}g&&oxU
zSm2(7k6~l=-z{IOZeAZ>Yd!p!?Or4PMFB6TtArbOOK)Zupc$>%Lp+uJ{a`4v^Hzi0
zE7w0wLUWDy@xB!6cu;=_2JaHtMp=rOk;Dwn$(l~OG3R%oVHckX2V|#28faHk`7a5s
z<^KFksFQw8Kt;uCD2Df&jh$P2(-`YV2{KaaI9$>9C9SC$Jz5IQ)Z6;x!HAnfeOp<3
z<HYR3jJ?RHNa(Ydn3h|kEr-no7Y5_^c5`IH=rj#V&Sl&qr(y`U<)!)={wNXErSPUJ
z&{xM>KMCb+N1L2GHEj9j;?Prr9U+D4Fc5%WP4!dq0QR}>-sX(E5stG%eJU4fNSopt
zaLwdtp3C=0hUaKV$Vdj`k;{cNCqWCXdWFLR0zU)1qogXzu4ke9DzDB(a+l#6;v0@m
z4c{MxIjugo*z=$=0-VO1G0X&dF;FPJ@0JdMrxh^7DG*sO5el=4f0~QJ9Oh5uns%jy
zP$y0>3YmgqpqeuT-sfs&M*bVL4qfPAv4rsM(>z)+rxW_j`Y1jPzZPtdxz~cxcthVK
z)>0tMi_14)ibWQDzuMwQ9AJi<2ODbk%Y18|*aY`giW?QfpT&v`mZ1d)QL;`G0j(`M
z)b{ax4PEmcApUnOsH>3jBjMo&F%y&;%TMPxgnr3;TERf3(4I{I!7j>M4H_CdDi(rk
zl0@qm^E$kqKhH&M3<GkZac%y8+~9&#O&FhxjpUaN=LJr0CFX;B<#vNCk}0pf%yn{;
zrIXj^n?^|?N`~K5jWpCSHki%<oldP9S2LSkAx$UDWiL607OxPo5&G`U1;(;aktUFj
zqdr+_uX-Y17|LnGv@u|XHniq>_-*wGMIy{^bCQnAx$R0X-Ve-iKKH@tP91ncR$K2W
zgxPsL!9f$nmm+N+=qExOgFpEBS0b_Y@vxMB`NcEk%iQDx8w(3p13H{JlBm;KCQJOu
z6e^QcmSN$>)DC>5R+7r@t|S=3^O{?g_8-Cf3_MLYNnx}PD;vHb)m`GAtl$>e8^2F2
z_k-hUf8>3wvie|kH0A>ERbc4heg+AwJL&z4e8~7LG%d`4=>-VnxtrB1JgT3&Qp&4*
zUf!5*WLGMiMGq=G5~QiTUdRRj+KASkNT;gnNmY`gF-ammPT2?~HPHJ9HMAbAh#Kq3
zo+h8F!o1lnw-Gy+@yI5ym6U(mL&SLt464I`2O-5^Qc$3a!;*liB~&j%rqrg2Ae57#
z+7IA(KNSEd6O_=l+y-sXzpz;sxpzA9%S<)>rE)VUgO_;Z0sJ}m>YZ?kTXc9JoU@&$
zvR`B8#HgLv+9jE!i$1J8<*^z=1DP&umnw}bq~8mX&dZ_I+KvD~Yb;lF0KWs8KeDcu
z3p+n67$b8DWVzH|)L(hj2bKgK82oNEpqXSskWx*uEvq<HXG%6#EpIJ#EZj~N69iCY
z!pZ$+<CzMAV!@*Wg6jM+0E2nLpD)mz-{FjiG+v1A4S<JsVmeo<19sKG#>fP1{!}+F
z^oZ4U0Ht19%n8EzmOdc&G`Pa=J`Y&MA;@S?S`6y^5;AxWm^A-(I1N|8DIXn`^2|ee
zv=RuZtC%S$u5eRG@QK#OmbmV_MAG_m2HXvUg8n!CAX5iPG(QFWVB!5fhKENfc!(`s
zAAA0BJ!TBTbZKHWm0Za@ze#E}{K(p#+2oxaJ~7!i@3h?KQ5O}pV0Q{flqa^_;Q<*_
z<_EPK6xmZSlDAL^ePXA=Bv;MKSIgvQIL(8LT=2oCDv!V-KKW`1>~T>4={!FmcAp70
zAHBZJb!cibvchkHYh8ixm)Zco`(L7UJ{qsN#@3naN4?qdsV0(%xnPi|zG`v=@B_xc
zR9WD$i`jAB37aFCJ(H-QLzKeHNZqwbP)fE7o!|AK^ooH;lib&zo#aa!a?>LsGr9~P
zKI!>&)|$I%;loTPqZZAACT%AEp~5K#yKM4R9(wd3_!c-pgA+)NEC5sihY41thJ0Yp
zB^b0Ws3AgKvKS#OI-WxwD&T4HuS(qQV-~l#`t5X&BP2C_^7?4jnyOB;H)_~oy{FG5
zOV%OOhDhmPY@rW1;~W@XKY{f^opjNnt_Y}7&^1Bb=t<<_r;gQ7;)A||<=$20$KR8z
zFRQmZ#zkZq;UXJf1K9b^4`^N}+YyR=IfSka5QP(MR2AJ6bV4#783u#`ZBz3}bIYg*
zVHB8h5W?)_y-R0DXAyUU@u6{k!2#&z5uy2EpO$)1IgFa_a_Bl%;;rI1cz~2|VjHp^
z_xh;0!=dCUa^iPj`xorpaLMlKMmNw~ZV|-Iq-GT(bR`|kX&?16IgGOCb0v9M!jU*u
zW>w!#kWA0vm50<n7X7c~>7i3@6i@+X^}1m`WnIppa~Dq)F4*O&A!tUnDS=06f6&vq
zY0ogST&{$JK^mKl;qLB@#~appq>?~|b|arx*yD062LC{i0<*^L6Pg{oo74{TLZj_v
zv$_?t1aa%}o0|T*ka|2GRw?quDJ#J~ZJi!mubNM`B80H<sqL=ixk#ofMUOiDYN?k<
z6un!})^7N{GQ=xCG>I;n%<C&~N&#aKCFM7gGJ7v$_I6uqeO-9!O*wSIilOMESK<0B
z?vaVV3=y0jk+dqq3Uo7LB|Ge?w@_tsqA6oUb(nDQS;#=U9a~oy#w?H;U=)0jRH{~K
zqa;te7L<9d0!O%_G8to>%z4lia6!s@#|t5{mrzCety>a2{Y%947}XVpkFH5lnW+`M
zzMARAEEl%xZ@KTR*BZZh^uTZ;lVKJqku`tE@V3>sC9pZB&2#08K%7tFEYt6&1Sjfg
zL7rc3Lnv-RP)#Xc>fH4lp6pB4)S2;CLO*l2EWW-{JcQ;y_<ZS?xD0)+GWhKdW;(`b
zOjs&PgCoQO+6X&pzPC;rjVT&Cq^o({WUoJWokp8AA0}lyDJA;QUKgz~>JF&UeLts-
z$1@sFofr=(RBlq#3;GBjp>Aj#Tq)MZk7xE5h&~e)fe7Hx$3VXVv2#wfMsRn~;8GUM
z#6B-$b{0x5d7`JmPj-D~5pQVGr6LsSA$?3sK!#Ui`TZsS?1G*sVJjkR#`Z;V+4BP^
z&+$9+1A&o8uVC*+!GD2d>yqnzE^xgc_iMM=Um}l!Hl-$WWDNWkHp<rUOU=Yp=HK0<
zg`3yks|kc-e16$W`Htshi`3#ibqzw)|Hwn;)TbDqtnx6-gS4qcEVqC1)Jbdx+gSDc
ztq`$QF=YB))_1*PRA6XT`(jmx{Cnj2PC*y_7VDzTPqXHLS8@z=(G&DWReHiX!y(bV
z!K10*QjP&noG+ax8%MD+ENkt#Z&*5BICo7rikK&ldJ{<HPEqcUcf(lXkf*CfF$tN{
zD}uckWhS9A4TT|4yo?1L`LT9MUzlQ)ISQ53A~ak{HckI-dih0As})5w-Ih+!8$~cN
zZ^1W)CftLtm0;2TV(XmRLkXiS9ox2T+xCfV+qP}nwrx8(vCR|P$)x+DyJx0<KwVTl
zPwlF`zO~+!IJ9S`M_m~ok#hd0D7Y}i)O_WAt@nt;COJrg6RU%Jo@1=PCZ~^6dU>!b
zUF%pI_=Y%^^2ra9(@5Geq?R?e$S$Kx0NigY->kz-^aeA`A`FqW_NrgY!!hjAHpEJ1
zHNM;I6>2UGaSr~L$6bonnABK(FyD<rhKz95E`Vn<8;QVy2K)9dERH09%0F}|YRy(N
z0?+xkX0VgBaw&BnAL7}v-B|JQ0Ks@-=OfrO8RqN`hsyFZJwkz^!tRU~&&ZgZ(g7Jc
zvcAgA@3J0u!n$dBJKOVem|#VcZo#N>e_2<ZPcnCw*(0`&(2sATcpG#@QFeJy3^-l<
zI6;p|{H4bJXl)Zc*1BtJbUP;`*JyLBpshDQO?O6_Z|@hFagf^|EIpCib0B2I_2k|$
zsIsB-xao{<IkC<>8ta!zuuTB3a0DVNYSWYf%QMlpe<|vV>3e8wzn+S3O|6W+*GA2K
zA2de!lBH;7{4ed_{D6wBsE2QmNi=jM<C}r+A|;MgDETqn%Y&pvR{EAhk<vK^zwfAy
z2>7=k%yNcnqfHF|jKRib6&5QrojlduF~U-Z_ZEG!s^sq~_ECc>bIa0_-dUEBGMDW>
zEfKFcL)U?nlV*)$TYumvY(~fL_|9T5ZElRVY^oLLB1xPL>cw&3iiIf_2gCIT<8MZ^
z`rv%4_d*|nRq|nbd^o#-u;_lfz8wLO>HIF-B0ws#l2YW+=nmmJ!X4Z(?hibKbm6!A
z$M=EO@jydc6;Ck^=80q=V`?<JVr9FsX4{QP?(TFlU1Fme2X&5CWi;TuWkQm<>8Vnf
zy(W^N&d$xjzsC5gaR*ja-8|tGx~KXoI|N$La^#skr8e6u4*nrPgvO>;IyeLVBmGBb
z=TUgI_C%Be$!#8%_ueSvsY{``KV&ct{l*U?VS%VHs|F%dFoM@^U`m{w2X(AyirwZQ
zD}MU|0)XhmKLjq^@~GQqlsTHS({PFE5nZWL*kf#ksGDr3yFL|0{sgRxy1a+WQ$cWL
z#X8wKnKyBByRaL7iuqb0=~nXkB3WE1R8UAmbl{n-j>XKZ4NB&hgt*3|9*%OFJFM0U
zE@PZ<WMje6VyTU-dS1|Pa6Qw^j{kCdXP<`QivYgO(1#b&tk~0n0!4586WUuft5$f3
z5f&}r%yb)AOY#;0*`od_1Uyf&2c-_Q&j#DL!3#fn-Jn#)KHk)uicyU`wyw*^Pq=hQ
zi9v}kf|$^B#-~JehBDFG(!j}E#)Ii8G*-wpZ%eHfClnGpwX3%0D@G;f!st=A2h32@
z9<>D9v23_Y5}!O@P8n313hj_2F*66qg$FT{rv&vD*^JgwHJagwDlgEfzLN`Am^q!<
z`}~|7Z?c>vvq^HjvwJxD=g7g;i-T!#^&Z+eWj8`Swo~ga3LClqGD@SE!rA{IX!Yr3
ziV5O9@ca^*z9IVwc8vJ*yto9JR@*XrBY*0mE>ve5Am>L)HG<OFXwla%f3*Pw42e}u
zX16;@YUWT()2kF|Ll=F8R}%!R?Y34PxRBlt9YwZDirtzAY^FK0@dTo?eevQ;V^IbH
z$W9G|WEfR)^z;^o!^EKEEvFhz=XV>I_cfz`Y;4&6`X&^iobnJ^b!qkEv>z2JVC5BM
z3{BDQni!e(5i7W!;K8nuX(+jCfliQ(i67W95k-7}LTQXW{N93M3Pkm#?Ve<jxu$&4
zs@3JuzHrr|E$GtG7h$hmZ!AuG@&1#=l9<B``j<(M%=Q^{fF`EsljU+%N(@27oKbbZ
zG3VGYB2bS&v>m;o#olZf6ABUH`i&@S=Wo~TVgO>-)~qYuB84FlmT>G(YZGx3P|OY+
z%fbpQdTN%1ZdId$!zP3s&WDm@$Gm_f8xk}*9UH<qFDN4K1FXK}VB~2}<f-R3jEqd_
zqXYd9hu-v<$k$w!el!>ZFBozd+Ww!@mC2@47W3P$n3Hd1HU|aWkF<A=BX<-kUBe0m
z`CbtU4YJ3yID;=qd5-t~-iWWpzBS5SFEvuRsR8de(J+gHiWA&LZS`2QOm6LLk>ghs
z&3C>37K{GFaZHka@csOI=3gK1zkB8WAF;^jir2~>d(6Wt3>*-_`%G4oUcxsxl{T`N
zMA~<ljZ`5OB|<SS*_>Do5y6cl4hRmxJ{}}3a(iBOQ)csV|I_^ax>$B0LvG_lJhNGn
zYet4U$&_nxNp{mB$vKf_G2<~o?m(tYc5M6R!}aAEAUstm7stdfy{c<-$neIi&z1gp
zrZ<}TE(DsC>^thAk35e><8T7Qh|S1j2Wy&5V_hFL=~z}ZqQN^F)1Gy7(m_?S-aByi
zusYlLjl`FDV3M~w(eN$#pLCNq)<<G>wn2(+>WxVX&6Jr2>Cio<iyr>e%f1h9_0}8}
zFb#kHJD7$q!iQpYcYq38<EPkiTf{NDfnH_`d($LZ>h2(wmWD5Nm9^2EO*PZ-y|A-e
zXiAvIF<ok3)U20=2Y1z_{u^#}W74r(<0M_`?g&+vhA(^7kviWPm6y6NeAT7NTTitk
z`_!)N?sVflul34_Q*YY(7mXjj(}MMzthI*--WEOlGvg^>t7ZSsGl16dym$P1(4{Ec
z@z?Z@KX}pfPB=JtdJiQ0XnGGO{AflGC_GtuPYNgaIuO1Ywzh`JWyU9=DLkVYvKC)7
z?oj0+q3Ldki?tfAczTcU#aabdER$E-N`saQ&-7*XN`try&){W`=Gb=Sp@Qjdk;_&U
z7oO3xkLFmG@>o^|Z=uUp9T%S2GhfXi;PM?v6aEs{b5$&P25+Scjy!JCG@j8cfG)0@
zS{$R7P}f)fKaSZ;Va+jvWrsjcrD@-A9X5@x28=BSBrcdZK*8B)oc*mOoMYJ&_if7e
z*qkcQ4rDn>cD`qQTSM#1eVLO9!$rNy75s@A^o#FAC0DANXy|MX!~0SEU__oa<(3?w
z^`{)qhOJxh*O3G7B2C+Oxq+IcN*BLj|1K~0p)fb<k$Y$RW1${RN358H^lk=^Mte_B
z%axnsG3L{_z7hszD_PmxhosML1zH`~VS9VNu3<CVMUPDOmd=Ye&{Izf5Rs6)J=vZ!
zS}u<C&2+mrxdvVO)=wCIH8W(sEuC7=D3%2>)_WO;0nT&Jk;ac~C45Oi@@VH$_m)?b
zur*djr0hH=ss}k-vjM-l`aLcaZ0`N)1AB%IUwnSlU2a`h+_|ye;ucp7uZ%d-{?buC
z{=UoPVI$$DIUkc;{<${yMVyn<q9X96pO%$^<1dEs0FPCrL}UU79FkKxjG&)LLXtAO
zz+&xa;9YF(;#q>D(kutY6tX=Nzls%*=E1chrIM0=(Mo-jfPoxAl5Cj|tb@dXjmk7>
zU%mp@%we-AvogAwG}`IeysUxAMVX@{1O?}*SHKUgo44ZBBZ@5!K0mK(XMewN;^f}^
zo7%(So%`Led=CKzO%njr{iTaqQu_cy?>T+YaQK>pfXiCJ>40LGfDHiTqgn?1f&GP@
zQZU&uwD$j45xG?Z!0cr$N*H+S#|n5WLT7c680_1Daa7cN1i`v_{qEflJ@b&r#wY#>
zj3|Xk_k>P1;V0@~+GFd6MEx#A(N+YvUw_nU@$vG?DTYr;4stF=q-ri%#LCX}8&#>h
zN(ygOc?0Lz%tP}r;Rro>0~$F2pk{>nj|JQD#FgznuN7=<!p_0QcVK7W+QEu6xQpBi
z_hZSaRxFWb4t3^nJnKKs240mME*)!V@n}Nhu*{&zh`8Mc$t1GxB8ZEOKAB@d^aozi
z`ey@`PzFmXPaP71?k!v`O<V2vyIG}&;XdJ<@(4A$clP3ml0?1~o<2wY8zv|)58hIE
zun)m&U|**tF8${??ZQVmIq1yPzW_}!Lh*@5%<9V$E)9Y$6cKOh4Wanr{fsKkH<zvO
z_}-SD5|x18TNM+*Zpl^;As(b=JNbDi!ogNKK{;c__JyGG_)V=D-eFLxnm(Tog9-UK
zM?U=bi~y3w-vWkg7=dGqk<A0NMo2((kH#G0f%U6N`%yxJNY;An=ZC5@5CNo+CFoxh
zvip=Dom`hKj=C%Rjy$7^I-+3B3N&VEl`lBm0oqS`Cx3+h{yhM(j+D$m(gL97!;Gx~
z3VQ1Y<S(Jt#vHjNm_?9okn?omA4^~>EN?d7bg6KXHvqE#7rR^SrG3nXB`PN|&#q<1
z<%@=}N&zwHYRzPj-?g8NUj3zT%cfr|Z_m3*k1`8VE^{c3*hDx0ob(qWnR0*`ZQw}0
zR!2V|*W0K}IQ_P&1=B_<(LK4{D|=WcduanPsXT1T0UK1uD37ER<3R#xR8T94=!o4K
z+4y+SDgU6#_;MOTs^2Hz*6Lnkb)pe)Vype+U0Oom*l;MVIONxLa$Cx!hzukKqfmju
z>$M$_Eb91l8Tc9?4X6Nn`)7rKIOS}XS~Xh;5Wh&iT=~irQ_kylsWue~^nySJ3j;S|
zAk+VZB1*!I*uPAl!#%P%fhEb5EhDzX=v93S@wTeO5;Hgp$s!cR0{$P-l_=%%9snpd
z&agsvfpXJ$v<gWLh&=yacybsMU_=LEon3s|5Ms;rDThGlXM9y<TUV-<F$`p^7#K8s
z{M27R++j@1`=!A0&`_@9<gsO^U9BelW`@~>Avk-{+Ma6?ze|Y*O%dq!W42g@x)W5c
zQ_P2BZji6`W($M?aZM!U=g6CXb<-%p54B`bnfPG2VEU2g7ucYIsHVX1mobRLbr|;T
zfg_L#d>Wc0P~k+X*F2~c;Qp-=Htyjv2_S}xk^Kj^Ok#*62(5p0@*T7G0E89Cp|M2y
zqg1&W!daBa_|Fpv)skW<S1O^MJ<nmR*@+;l36}d$6P`iEtlb^c^Xr)b--(1+BpI{?
zy7>`_TN5rc4}b-21gAOyUd@mqM8s}G|Csn4m(DRqG^OA&y*{3~$D{GeZZ#$fKZ%BS
zsB}FkhA(j;$Be9N{k1EGr0uaW#5!aS;f{4^LJv*Ej=7JDx|_c-GQun)RC^R;F2nG$
z`)bwpjjVG|=A~!LsY`U0C+<tB-dIn@Y?WlQyKw#ii@^&}OC3=F`ocKsryCOl|3QW>
z1Xiv6>_nlrB;b{F!qCUN2>wrC_Xc>vNNkm}1ps{la3fE68rxagt25hR{wK~m+d1)V
zgbmusCkXp#TRtbhB^LXE_Bd=ks<Uk_*Rfp+-_|`F#P8mve+GOCXMo;F%TPW4Zl0sc
zESq0j+E@QWf!E@HC2!msj_WiWBdh<K%FXA%un1CW=`fnjkzJs`;}El!Zp#bZMr4GR
z<(opp9Uz~tu#FGcOFD5|L!Q|Wru4+sR+JVQquruxxpEP^jYCqyq325V?*O7bYK3m%
zZ|_tFP7$KXScS%m#0oezU?2kn@mLT5?v4tTTE~%pm`H&DQ*oT}c(ao%I&mflXE;_M
z%cyaOq76h(f{NN7c7bMdy$k{XeR9oR!Bs+PY`pJsP`T<SGIPcI0klY4O!(BQT5(&d
zaT@1=ny!7id52k-iwcV7zol#eCY+>WJ^%b?EqH#9H1X3^ER3jqrfbArlNjG+;w`yv
zt&yeLV7J0<SUubE#lcR8=6Qf-vI+t!w94`)n#yGu>S=b!Qmhsv#ug`>=_}tCy`x9C
z#GHs)`b9`gQy@>{4igGdCkdEQs}2w(E`#0d3#!}dC1O~N$;SHBz3`O~ORCr3a5gh+
zC2RA82oEGPH6kXTeV+jSh;zA-6zM4~vW1X=YAR)-uy|jj7bH`LOFot)!-vEPs~PFk
zx%RQ|JXs#NM8{utOF3ZYIlIAIG?1Jr(D4PMNf&HL1@uBx`@?L(zNBaBN(aRzY><M!
zOL>#SO%{K+G1=^%BV(Nd)+W3w5Uao*f*E6YtxE;6AZ>90SSnW%6@$k&F4n2NgwNi=
zgzgIGARF6imSxyFDe5s}^X0kgV1dyUk~Pc<Il*O~BYd=2AYAaUAV?9+05&=B^zBeB
zwckFo-tmEh<S7s09JN2eAj4U0TP#)Lo%UczJx7>}eE1_=Wp;b<M}HGIc9Jb@g1m`c
zhglE11_2u4Xi(QSt)S`G)YVg{S5fnm{JvbP2VpCC6G)JPI}7Q=p_vb^4u-QtjD0YF
z6`B5h=JqES0)DV=;Qy9Ba9#<w@`7p?n&WHGihN|O7WzB}*Il5%V!4}EY|w0G8`g1g
z)`R~dIc$iH^Od6k^~`a(g80p623<s_in9idabQQDAtP1(p8e@xdujP=l~=6H9LlmW
z_7kqS853ic(af;}H^UP(x#mude~hl7Z%y)Qh7sV2b2)!)3U;(=Yig=)jN?pYB*BHa
z3n_b<bp@E4F7#48>=CRwY#PvX{?EZSbPioQjnd@ZgRI4=I!r`@qT3_5rKG~xU5QTv
zh8u<yxOr^2oy;xCn5}}uSDj>hJYOm_!dG*En4w0Q09nIuo3KeifD9(hFWFegqn0md
zjha9cJ2PZl0`=_Hj>MLug6yv!XV|x-8y9akSdMGv5YcWT2$GyL>Hfz$mTLok8ZP9D
zEiti_Q}vJ-8Ots<Wlj;zn^~!&vqsb0nRoMziqhLfB&Nwc<VpVHTO2dmB(<Zdbxztj
zSgEt4c6K>M5meurU^AKq>h(3EoA6k6TUyluO67u^VJ=e*1~2lgy5a(mCVD^D0=0uQ
z6|^H6TJTCu6u?D4qqukvVU?ns5RIX>Q%@X-F8jJE6^F!oH9`o;nM&hdC$fvEp8;h8
zd;g0jz8zau*L%M%$Z>M#Y?>_1=rj@&AxD_4<Yd5IJWREPz(a<pUKy+!<8S&CSqeNL
z{#NgngQ-S{N;s-tL)0o}-?eAdSSB<yPPf3*h~~_lb)z?<w?c_lb2I`D>j^RNkto%`
z`;9+*SadHtn7b-wCKL-Ei;B6Wj62v0Dw-n069oshm<KBil#IDsi$9$(umSF;CQB#i
zfwx}BzCek;t>oBZ&p8MasiUzQC~)hMTY<iaP@g#2qY_g=A6`K^PFG9;hKSNaJ8=<1
zj~M$hQrhA-C|c`7`DXxC;`l_DLHK2h{xks;KDs0&#Oau_*gOEbtO)<Yq=_-nlOS9^
zn)c0zV55L$8Y?!^#lSqEQ-He8p3~z({G#}L;n@jOg+HTM*VwNm^^RaG3Y~0J0?(g9
zU{UD;OZ+!~8C)2TG+B{p{jOp>2-v?yU>Mz~Fobcqy#eY)ZVTyUjzv-AUj-Ela^E*}
z8L~$PxdGY?=nNN!*p9#ms}X1n>nsGbtO$CcK&-q2(PAJ>Y6)n0PG`C#cbICi{RgDE
z$I_C7hD*@~*Ft93G=DC3LPo`^s~cr?JVyv!u=o&|X#p_|Bs^O6R``2Fi2r-T|ENq5
z(+aUql57WT;?Dp)+3M>6VbLZ$5$0oXM{Nb8qLQ#*f4FQ+mKYn=Bqy4w)Z+!gUt`t+
zK%s`*q%BJWdk||rCO}4Um~mPV4eAT5q>Ycss{?7w+^kxv*)5xLd+_-+25%V#ln=sj
zgVKmU%PDeDpPO3yYWT-g%i}ep#mjKkRuJb_;0gLPxcLOwAS(xh-{?jJKF-kz{&vU)
z5R=wdiMtYp407JYRe$C17iLdM8lIWq@)53zQLfI653dhKMZHkm<HXnw)2oWR7%?RW
z8f}e15;&|R6J*$dZ=}*w0|BP4^3$vH(=qpVaR41L6ORnx91YiPVX(Rw=~n8nM<WIB
zObmo2+K7Yg6G2-dv7C$4sVNu#(NS1cbl(anX~xSfPw$x(0z@g>|Ka2dVU)wHOy-zc
zHwT%Xt7UV5&cvSQfuDh%|D=eT<a!1~WMBnA0fRg|hiOX)Tfk<Bbe*kGzNm2$t!qD1
z#+l1=sr92LC3m`X)fz@$X$`+IVQ&fdIlnn?Wkj-AV-xdpX&`~ZoHFTnCgE~=D=d_S
z`F1%`?Olt-D7`B^nRax*HU>=U<frD<i0>?9W*|`<Oh!{=k5POXD_)xMH%(S-bCD1B
z7vjeldQVx!d${R}t*Gg;m^%WLr)%c;@l1a|lZl8n9#T9hh2t=#>x8sO8SlRh5tsvk
zvZ}Pr$qSb;cVfNy7+FxngCGzWnR(*mxbne(DHIW#UL%T4aaB`@d1=amxIqXsKGx$F
zt;R6m(@2Vz>8El`tj<hSk<noIM<8r^R+~vkmCr)^R{=7(kVL-M>Hq=OYZK640h1nA
ztIB>TN`}kuR@h{$NgmW+w1Xh6o;4ndr3FT!bq5ttt1567nPY@$&#kGSUf&I26Xpk|
zE+Ib<q54UQhHCpkj&IWAd>|ZTS!`N<y47;_uF@?fh~{5lIyd|6k2XQeO?CedH16Mw
zc+OYW6~tHaB0UiiBPP+hZoqQ<(%4s7uFv+%#jdY*c_gCAq_g*{+1S>bHwTX~Svx7$
zEfi&DV$S^qEOdXM*<ok^oN0FX%jOw>olr1Tcp8CuBq(@so0ZAeq4ml(_l*ZVm_oKA
z%<GdpgToy<^_g9ms@@~Egc-&beIa{so_1ilE@7mSkKxw>;;>je9G|m!M0L0)o<Kz0
zpsz!4ad=!`2^{=DqsG3C{wV4sMX2&aBL!g<)j)hF^bx45JV<x1{<W^qa9?;9e)Np+
zZwo#tY2?2MJaYu7;nm`9>)tOl<EBy0_b*8jzBf5kiGCHrBtuH&@fTz3GfHY~qq^2q
zo6I6$Xh2L%?!AGEw_<?`x=~Tooq%vc_nGg8jr4z4(gsC|7};PF3FO2JbWxDkvhs<W
zjB;EM2i+tcZqMQOO`#}UCh*lx@M|oN{DnCQ(sYYdTpXT@DVD_e)SW-0z6VPTJ1AHD
zy0aj$>owb247sCWjL00bzccb5NiY2w@NjDTYWe~~7Lt>3zi5R`eP#%WC-{NYX?lj%
zOyVUj$*g=inHV1)ypz*NZt~M72=?MyKC>j!&la8BYEj<DbErc2R#<J)QWt%9+B;D0
zkq5+}*)lcRLV+*wTGJQ)2{(7YLnThzON^H@C7_~M*yJ$QFfZ?5u5vA2<v`XjFYaV4
zad~ZW!zOrY7`0Zkm}dRs+pJz$XWZngVNRz_6ysmujFe^3A{0CC1%M^_+U^SitDLBp
z3#BpzD6j>(Ief;TQ?N^FMI$k;Nz3=3M-Iue^ulngWKSyZk+*5@^;!lT@Ml6tZ@YtA
zD-+eR{xd$HeG+v+r_lyALFLDNtIiJ??2V3eAMX&?&@?5U;3k#$q9k)7xiOKx$dC$L
zRgwzGOAPwTSfGfSsVIeG5iS737{VZcE1Kt<60sd@UZYq5`#f*Ny^m*uODyCTh(hYS
ztLsNfCP=FW0-FDAji9!RJT;0?Ex>O{o;p|m`=r>^>B$(AilfuhC2+yPWaCWd=P8f#
z5-;C<S%lBJ$hhxXaW6G&(VptDDH~SoLUv=U-lM72*0bCBJ)~cL*s`D-Q?P(dxjN#}
zYIdT#5H``c91YJg=WT=9A?x3>wS>Eolfbk=AVR-#CUIii54|0<Ke+=$<VmK)vOAnl
zYWW$*x1eURg+%k<2epJhlSW{m*O;?ra06JYC=PgcK}F*!yDHHvEn6*Z<ka&F*8%cT
z(Yu2GVx^JUgHf203YzzPX8vws2dY(Em!T{0Oqi;(5~VKKtkF97Agwm9E*HWwukALW
z6;IsrpL8tvp(iiu$-1>7o5O0b4cGzwT83>6#0+Z5FTeN(Zv{*AN&BVu5BpRIjRI*4
zoQ3LB)xVix6XBLZuRT^zhA|i*;rxS8E*fDzXl(6)k}`qHU!eu9r0pJox9kOP04#m*
zGvfJgzf+y%2bFIU-yBjX8~~=6`TW}%4O4+O>ddxm$THTYVX*@#IEV$~(tqiOJm_oF
zt#aTyr-isf{(7OaD#r~}?4;XFJ-Z0>-qCKP1#MsPs(2hjah6;?b#@&M*=_T!&FgMc
zHe-3pwPFKq%Xdu{Cf67xGD5s8OFrMFQL_#usD!s&Mm`G2UwJgeQk$p$PDMWP6g~dd
zzywL%x0wqPw#4@ix*n7WR)Ujvj9-_I4Xb%G<?s(WwOl;FQtAn)!Kj29Q-ua_tH>*x
z=~f7+u~y7LEi0CM)Y1l;kO@Q_<M8kzAl@NT9Hc@lzA}@N=gJ>&_vyX13#7Q7A3GjX
zVfSV}uo-+t%)KuobA8OTC&|$@?0`r@Sk$f5<IWC^`zn`VDY3E1R@drasVX_9M|6D)
z5%R1IAl@$2ELq3L@ESD)>Q67jIxtA%R~1kT+7|?YyF;jC0MU)@a90bOcGoPmy^X&D
z@Fyl}i0WCgCPE~ji14m}D3>q7w;Ic?)sU9+GTN1#B+m>98OOn@P^lsie8iH?;OwXK
z&=3+}MJwMdELtSD_2qKB!V#``DUJne)*Q-c@-PX7CxhRV$Sna7QB*c!ojU|6A%0S9
zJTBg0l+Ao3F>M%(%*ac2m|XVQR#6_(e|Hffd4a0kln0)|Har0ic-Iamas><}!m|iF
z=C7XsT8Pp^=5q-uM^Z|Hgna*Sf45kEF-6?6fryxWOkB~@m=(xi|LlQWLrWj>J)I|y
zPFV?mHV3|HIgd!C#8-u`xl5`~Q}|n;CKDUv=qdpWH5N<KpYCz#cWkk{*-X%KbE-d|
zc}8`d;z5cu&*v8bAr@tbJJu2Q;h%@f$UYLJ;su_}+n{b%uKguf;6<y4n8=M0!)M<!
zf8Mr!`6Kv1E+zKK$%2qy{~ai5(*Hw<e~g+@mO{z%Eh!jP_&8nSv>oH7p4-$r+LbdH
z1OtM*5AfpdA4IeEL~zubclkax&An91Z`!)~MBte${0&;ld1RTu(sA4xDVMo{bdG9s
zN`q)p3cDBRXw4YD!4+Wn9*Ag&OWYp|g^B472|8jV;F<hW1l!8^JxI^*Vo6H(EGZGF
zSh9wTQ?3wb52N}?Z!qFl$1|y;WMb3~jjOxlIZC+`DPh4Zw5pvN_BX8bCYA9uX28D*
zCoEl7M6%3gFu2IThu~8+<G}j(DXwY4=a2=s#Q}-J2t0vB6;$ZzN9=udZMu&1Rh*JW
zl(FC+yeM@cn8?*b*y!c4{+)8%S~1%rD+vO3G#$;3Nd6%k`XMrSMP;vS)UUgy*QIp%
zHV3Na-vBJAV4TNywU)zdhjtX}c*T$w<8L%bLWwai%KuBb({+^o-8RB<4BIJyVBHYL
ztzs}+3%hqe2YvJUBB3KVS-qERr81<*;xa@{&n8X)>=xu8TcdKtClGV~wqtQIL^{cg
zUXC!kS%==?M`SXd-6H90AyJ8lScch)A0IOuT$o@~1v<t|wk6Rrsv+XXC6w?FB}Q9H
zmFv8X&;spta6+_Jv3j)HZ0G=tK>fF7L{wTveTc2F`+abeymji=;;T^bsCPcvGnh<m
z^ZK@p2nNdc=uza?^Q4=%#2RAEWZiEBkya?W{eWsDQ1ljhK*{EEP}6QTPs*HK_PC}J
zR_>4p){AqE0ix$}wf#?1Js?TS%Kb9bIsCD2(KcjP|9$PC%4$bx!mxEXA}e@zSg#YL
zEk80Sj0r6PJN~|Ss%*okX6@eA!idA0qniG^)a01)m+P5g{XI3FHmMq->U?QSNu;1#
z90eB4wSNit(I?0Sq*A+ey-h~Hc*}A`hQ15YXwxO04^ybNVyQ2QL?76hwbYFd3d`pi
zpEg$LsjbT+iCdO^mP<2{*w=-xk|KF-e)Jjw)}C@h1IyuSW&j{GOZAMDA%VhYi7{C-
z?Vi~NIu208cmq)Aha$=_(ZyfML~X_vQNw!fLt8rez$Y*tE3yD{!h*<cc`AwEC~h_b
zFkcRjZ=MMilsx#5kgCbT0=TUE%oIpHUrMewDn^*Tm+lEJwg9y_UN7pnp6mr#{f-vD
z51-;S;iwZ5_j;1+rKR<zQ*w#1I@k(bKuQg8@Lj&uKx(v0v7m9jD-XGx!5x9p(_Bg%
zetuqNZcbiqL_%$oSjN{UH?T<FnFtyi`RKkZr*F0Sem0R6egR+;8nkw<6@I}7RP%hc
z?RQV}lZ$gNpVYW}3K@)lxWRS3ggO0whv&J@eq5o&PLq_Yon4pwYv__(yEnZ`IX8J%
z_Pqvhm7e=F7n&0ERsq&D;EfJn1C<7e#BzA_opY7hq}GuOn`r%kV=rj(;oCM#lOw4g
zirq%IK$R%9^_I!(oTbKKuT@vaYC;{$P%Q@s?>Dw|8hGx3J4;WB{rAA5bmwuQmNnI@
zAcgK`$Y_BC6h=1rGR}acwL$`G%aOe|OFd-rS|)i<b^y_Sr!m~<7urjJUJTnn1T}Ud
z0*<dh#SEV(+J5IiGe1573hesUrwhz~Lx_a?t=;%a{NSA<1~B>8<<ib@6g=F(bu8nn
zo?VhHtQb?$PzmgyhSG?G4D!H)m$>>0x{?F}*>{1@`3v5Dg%U%B^Ax}1qR(QHl6mS0
zXLu^xD#*@8rKGJ%x%dSIMJCj07SGKnl&bq<A?o{O1Yhk{3=(4pJP1N>Y$<<Z7A1qn
ze<m|6fI)?^i*$P*pv3D`h-LQ$0C7G8T0sX~Pj2gF*YBtXemU-&KxAlfkn|iBRMAo4
z#C?vOqyNzBkK%(!C+OTQc{B`6r*$w@B{SXjR9*hr?mV#kWLeD#f-Qi`nVbtED2dR$
zv9a(EsE_4VQnWQ7z;51mI>|)KzZ(3~0%f*F8A1}-4n|Fp_qKasNfpO_L`KQ#ZWZ__
zTIx1J6L&;A<e+nexCe1f>MXP)7F?I43~!Q)pMB|?TjW7QAErob9jg~Vp`NP*OCh&<
ze`0_N2~PM>Z{^V%JWtaN{Z<Sj2O=U(y+cc{tiXmWW$O8oQ7{_SQj<bGXIQ<CCsnFN
z8I~1fa4{<t3ClIYL#oh4rqx7AM^@4YJY;ggSD;h!;2b^nzM;X}KaLszPrl!~U<{hO
zky7WlNr)(pZm;4|1Pg>6J+--W=VRf;(9F~C_P3q$6aqqz=z#e!ub9XA+4ZiBF%%*)
zZhmiyvIm*Y)N{e18WQ2Alvm5zvxKw^Z)9PT{T@^sVujuX(zSd&qRk=^Pe)->M6y*q
z)-8tcMH*3EtbZbp6iBp@M*Lm7l1cz%3R@4ZcvRpvS?V9Y{2xh0qXrwbYTLM|K|CG5
zbitMrc5~E+#gq$p+)8sT+!2_G%(jP#xo}AzTKD?`xD}z=$f3`~goe}!@$bS>%?4+j
zqKXiqk~7bcL5SoK_4a*~o;16HbM_0l0suakQxD1(Yt7IY=>Z2nc>JAOt^GBZkt&0I
zm#hN>JymH{%c3ir+E+zgXMdk&B7&%J$H(YSUHVj<wTLX;USQcpCm(l$=%)QHKj~#C
zHwwvXLpkIiXAQ|$_qYv{U^cvUg1QTPBCLU$Gy6H@_6QoFEmIiaarw4TI9T5<GPJGv
zkN&c1X(hDY=#`DBBdYbnK9LVJh3Wj*J79OX?|zdr)Ei-K`+d;xS4Ttd80H=gu-&_?
zpc6dGz<JsNE_P395^wzC{X4^<0k^Zgo#%Ed4%?f^F7<s^qIP}Uv?RlQF0?*<Wmp}8
z1ziH)1o?QjJ4}BLB0`DmccaJqv9UYUe`)ayZJ9v_%bwiBeZR#EQ=8(`GB9XIJ3+%!
zF2aF74th2h^S8gpoD9D{zUkZE{i~s_&^Y)!oLsz}9+wMy+je@ZPyGM(jQj`UF_HH7
zf?vMU1?B(8Bmd8Uk+p@B^MA3GG<BS^I8c0E>g034?V~WGuUuD}cGg)O9vU*pl)7@R
zjd`LCk1)`Q(L|7bK2nCmKp_rVHMYWLWJV=ka;u7}hzUU=3!CMDKp-0dpZh5_{dc%w
z(?jpHs-DQB$DzQ9?ZJT?h3Gb{wV*(0fV%-xTcRaU2&RB$N5=pG6HaOBV`x?hDg15A
z0o~CJ*@?y@c{LdwkvoR(fV%|@&fH%-9Wz2ZBBE-FF~r<_dqX*}mo{1?A)prLzg_EB
z(A^J(74=EiR&N0{Uz&d#QL{XC7CM{SrEZI1k-PhC#ca6SjdPO-`U6IzJ{A2Lm<(zm
zqf|@~zwxv+)5i2-YqAd4JHUk6MO51(!(;G!RI5yiNFg9)+d^{pM%^k9LE$Onxr@;9
z{kp8rpwBx0*<`UUuLyK?-Jbs;<QuE+V<S%+hIx|ocDVegZSo%;(Dm9$V1xJBmC}^q
z#0jX=>kC}8y1nxR<J0!z%9`%K#QJn`Z~-+vMYex9e>Y~z35<g8=E#yUTk>K${$89T
zCr_H7gg**$;M$<FdBjU-sJ_Wr(>P^gr2`GOoCEe9%n+)$6e^^%KS~HTJ8;UP5OCqr
zj{x2S7y|};yjD2iEfYkeD~95Uh2R^&vq_ph%de>DbXMgGAe16{U?IF;R}(&>vp81B
zG(+FrE%JYk(qV8=GF9uAI~Jb5M)7gMI)maRP?IGrOM1zhoAjM;ohvZwVp0CbN$UL1
zc40$~9NSh*+U&drb9y>shMd<K@m@t3A1$!>!I(x4F@+9hO>;N{Yc3E%;jTD>5CW`B
zdB(!F(Y|oW$S9z$cQjT_SjL=)WKbPdOXyGaDUcT%6#ieMLeB!wpUB8iF0=RZ8|#bV
z(A-x7v}zunWtb$WtG>w|m&ZRM=zJ+*SI$g2U-YG1jI4FIE5E6VEMp)3V1Ew9GD(r#
z8Y<or|0GRAneZkP`%~R=lp{`qOl6%T(#9o#E?sh@8jxU;ae&KCi0}>&WoyB3Dd)+j
zAKoG)g3@txk6dwD!nUe)bnDdqlkYv-j&nLY?jigDr}kH{>v<qCLLlSg`E7X!0{ujw
z^z9^IF43WXVC=V)Q?OmWZ0y)(X+EwwK$fQwZ=WK9c#y5Bf5?g73M7YYO3iUDxr<gP
zl^NiJX&&Z%EaNu)lA2S-m_N80>K>jgL0?26UtSjM^-vu8lw==R&(s)EWMz{s7FRq9
zWMVT`I2Q|;%#_V$IUqQ1O=(nct9G?)_`7t`PB$(b6bwT;EvZV!N4BD(DH|D&ojZ9U
z@e3LOg+@qkVG<~PRE6l<RWMfQtc4kwyU&%^+IkfHdV@+^UDnCwkim;RBBAGwDMM~~
zO5&+JN5{*H)0%K2byutR$+55r)R)#N9pTHEu{)R|8vgYxKN(UhGcqrx<ptPTF%zpx
z8uqA!h8CC$cu4!5gp@=@WpO3k+bGytT1J!&nW`ctWnvMp;KNphOUlBUTC1lqS2Qp>
zj+)EYFh^8S8uGKX1YX(FMfrGBPdSX*LgK2alv3k#u&ED#gQ&wchLo8+HVg0p>g8h-
z`pR%qg(1q>9~f0jX<h^^ytj-sa(KWPiTqHP{YmNQShpEQvE)?sWD#g8?(K=i_7jRh
z{yx;VvtvZFn*-+-X1>Tbg`~#mtgl4q2LOaep4T0)2ffi=GNp&GZ8Z+z#hqpVbDLjr
zt6wl@(&ozUy4VPVziyR7si!`|3xjjfFD138xzif+y$IC%mT;2yD5liJN|(8}-NACA
z)mb{7J3ea7<9s<CITjKB6Z}+_-r8bLbMg=2OVtx9$(3nRk}`DJe=Sxmxa1f_eJiW}
zNwI*sOmQnC8P>wG!$4g~0*0}0^AN3RD|~ukNk`O;2TTE$_S#edBIN|9vu;GERv>ML
z)48o|q^Hhb`jWFd9jinSw``Pt7keliQq4``#GzhP@TH;7H0-JSwyOT$vtzgUotlB^
zN%->4i*D}f_4r^st~v@;)06OlX0>c<<bqNBD4d`xGgXgAZ_+erM@=);L_Vn#AYMMK
zufV<OajUR(T#^CitW(P|?=85dyUNa&t~;COd#_o8^T!9}tYA=<UI(40SO|s45MsM}
zpz)G9uX`|IPZIy0-QB95amZ_w2Lcz{!og~M9FLu~-)}dHUEQ#}t^`6e>pb%s8X7{U
zf*8RYMKD;g$=Tmu({yL;l!=NdEhCHb?CZ{*etpj~{2hVP-M>buFv)s{nG@MtFIKv%
zrMRQqzBC+a;`g^yYM@x=v7(LeQiOZXIex{ltuqt&dVW6dM8bc(ZUn<TBf!tbf)hRz
zue@V&zrg+}be3nEgeH*G;{n(YkR-BB8x8C;_tfQIN9v1Mbt=&Rq!Rvz)AG^avMKbJ
z>v}`+zk`wg=U3BcL{q~JM;x_w$8<4TEK!Y=Y&>=Vs+H7Z$kh<M&cV<TWsxlaW@<i{
zPMY?GCgZ%7G<J9&2Rzr~B;#iQnm?Ev4!-+Nu6?(r_8O0NU?n<mtg=_7?Ok<wyQ;Fr
zZ<XqYX^z)2;wVqlugx~YP@W<9HpE=s0%8}XfN-PKJiX8+B9s)SS(B()bQ2L)eAxyO
zO{9nbsht8#6gEwyXt<tuu+dyiq^NL*GTdIMxvEe><*5+Ds*slwE2&TiJ*Lx=Ho{)2
zxlGhZv)N3<C{%OmfI4%Ba9<6BgXR+wEEb}V%2=`*7j^Dz1`A6M@=o+H{g&J*+%uAc
z?x^~<Y}iTZQ5KkY2WapNLgwr4ljV*?H^0!ajlM_X-D!Jvi+4S<%l>I=Yow&)B4v`6
zK328s>*X}p^UGW%w6RA4vVYDaDbgGYg-fvNV#*Xlr12&ss9Y{)XHk$nns^;%qsNv%
zlYPERVx-8ojTAiSxSG^Mk5xwgZr*F3H~+l*k!^{BwY$yXjS%7G!>3S9PP4s~OSbSl
zY6?CJ-p<dr>+S40DekQ&;6;WFyvpIoP0v-)#{TSvu6du^T_v2A_;2WV@S~db-6Vkm
zgkhpAZ>mwU6^l0vFW2tB5vRvX8*N>!_zT@G^vQuiAJDDqr@{PVQNcFmxLF0zrtrMy
zr9onOOAmuH!>b3watv>Ykwp%OuaPoehH$hE-70Q%em?_?{f)PL<OzF)+G-lZtq}>h
z!U3Wak!*Ys*(G#O?7s%{5fYj+#-=j?mwoYH!scUpgXP%M5X6j8JjCJ934&6F){qU7
z(rg+AYy%MX9-LJ9d8>K}h;_4~RnQfpv3Tl|NTPP#(plRLEOo0^WYN5EB&OukoeGSa
z82Cv|Pf)w0`XT6&Q-pUrAIs5e2gkNp<v$xJdMoBR?hYE~D5*;?T+|CKiYek+RoHGs
zx&{n8sHqI%m=1?-f{Vt24U*fjMc3Q5VIr8k1=kgHMp$l-K%|^dz~8WVL%|$?>hd-K
zt@ij$Tj{*QQ1#^~d#Fui7X3&oGzk8r@hgKIIObsUpaZP|X=!RpEZ^Tn<xoRq8f;pY
znGf;~=uU11Io0n9^QlrQLX=*$HJf>lMibZiqz&G}Mqo_dY`Sz0w@mLAH_nyJuFK_^
zlu9x)$sP$x7}eQ0qK3!`q$}1l`24K#$=DAVBebxzzBSm+TSewvp-=i0FEtHb0MVF|
zb$)R?I~~p;#>f*;SKVtjkS1r{ESjrm(z>)(Q^6L+n)N0Qf3&O@7H}IZ3LV^^05>5=
zoeZik10?g(j;DsXJMD#~W|`iTO=qtEV#DMw%L8^(_eE32)}NX@|I%?*HEel(A?LC3
z?;LAqv#~`umAU{HNKN&(%NOvcQarY-UAtSY**qX;OuH_|juJDkH`CcaD?WG~5a@P7
zqWsjLTlJHrO~lvdYi;-6F4{q%JOOY>zyP2m%mGjmCIBc1LjdK)R)F$}617c)PN`Jd
z(;~XPBx%oke)e^df{9(3O9tcdCuqXA!L2p`*O8{~-`h?}w~&P|xXV=pIqh!tF&Et5
zxDR7^f+nC@QK757M}ph-SO%pCi8TH3<>f&9M|Or~97toRj~zs+vKOf0#G)MX)g?=z
zGO>344oyC-<%H&37}#YE1m_M6&#tSm)@BYh)N)k(i6P8|*4B+K5IUuwUTwbby2<x9
zb4g&ShcB6yaP>wNpforc*6djSxUEGD4{aQp6I?P&+e$mS16vsB^+&X^)8P%0JBis=
zqS*7>T0C5lnbid(UJmmsyAoQmaT$se_;H?vT|HAP*oAyQKc)PvMwUvG_R#r`$yhWo
z$&@#vRyrM*0)f|IV=CHibWt(jyiQ1uwMp25JVj78-hjjC)6kbhlAo4dH)x?T|Drj0
z%n0SIHfL=1(|qJg0HzgqmS|@4z%D*DiIIwWbwQ;IdUHXgk+sOkOzlZhwVcC@C3VGR
zazr^gGS|pVZAwZlvEcO3bd{RIi&$Z^g3WjDmWAsr^?M+-sjv<;cKM1ijdL6@&pPfj
zQy?&Nnj5N=;<ky6$x6zPvi8+JRX#He?M!ab#j5kcTOZ=1|3(7dG62pY1P+N<nHgr-
zy`4DG8qc~pXNMK#ySSO4omFuekz7!l#7hwa#6YUcu{fwdlcj+8T182k97oCF5j7);
ztPTWK6{Ili?^2>(9{Lf0kMa_UzVcLC8(O7?^}+$8O)0NqbgxI;3y5}4<`4Yl)&OBZ
z1k7g{n#=K+wj1e9K9#EERAWj=*CbhyODN8@<_O*F3O{NAz59Xed@&xU`C&f$bMmWa
z{}Dfu`}=L$0~v*?rX~|x(<%{jacMf+K6{;Z4k-%$fhNmX5iQmb?h{v<t}K>o1j+mn
zL~3iyJd-I_WNYM=dT8PC^Y8^`V4FQVF+cvNCtQb+4S@fmo7h^aCr?e}2Y8Xey_^Y7
zEZ_>`;1m+^f;HmPSB(g~zTMaa`L&KSLs=|I7WMQc(@=4pd?bchr~!x@zGr6sW8UVn
zis|Fq@@kaNRW<3ddhd6}eoQVP@%yzm7Z*Z!F)+u_#Ll`LU{c}LxgO5{;#_sdJIr(u
zr1)?D?&mGBr*jFzS=HfAv9*nLp~>$hrOfp(q<YWa%oc#dd2T12g=+bKT+pr`4!N4u
z<&S`~9rDv`T(jbn2mGVY?@RTNE$S36=Q6-!f`eWDyII(3QTfL#K4Z_d+YVE_Lp+nQ
z1n-S}Hp~48#st3T{yh8%_I#W!vew?~dU72ue$?1SZup6=FPUF@oT?~RBESrdO+0Sj
zRk#IPk~@#NoQ9gyQho~00gMm+xx3#VlpxHBWp-l!Q;6vbn-6|Oo-u<-I5qG!Iw+e*
z7)|&iw~FuX7TU9nMO)3icWHXRsdMl_&;TxP88dGdnvUwKrAXy(rhnG--^qVPmLP93
z*}R&N40M^z8){-gsslzv+EWU(F)|q`7o`i;SyCakRA2iW#g+Svfov)Ev^h;D#fkqk
ze6n(v7ul=ce$y0ur;UtLt&F6m9a^%liygNd6Z>~c<dAVLTXygvW0;K2cy1X%hK?+0
zacSs}50Rd_#jg=T0JYkMlboJmH^~)aiBRRCaGh!upk~_+tWg#C3Jg3GFHc^ff8a8!
zcJx`Jimtsp=g3xxv%!F1xHv$LjCyq2#$<iguY2RKVH37g{TA+y-&@1GPIKpbf)DQ3
z*h!ad_Mh<cKgzt-S^qsl28$U*`f`!rvB#F}E8tU};r~+-d*+tn=Sk{S&YesVCH^?i
zm+o;IXiEm&Ex;vzmObR=oHNMw#@U+7aC5b)LgJ9{>g`U`6!h8p;C^Vbv(VC%Wa&3~
z>K3)en>M}=!>QiN#jp6|S@m`>Ly})vI_tmSJUm#&oYA*_ggfA5rIl2X)#`+9t!Qy>
zyOkr|(510FfVq^DfB3}CcR`Ci)aCWAz-QU|s0L_3x4u3xEz2kR=dFWrMzh*O;`$M*
zLn&+f+>FSf%)55-`q4%R4HMJcSkfoG7`Mid=!Pv@M>*M!bKXo;thOkl!WHbYY;SVu
zg%Z<ltS-MLxab`32wneNc~G(Ve({poGIZn38-D~7WgV6AI&Hg(Vn_TAFj9*GFaH%|
zgq*^chvlPT8zgX^b$&3Xs8fm#pY|u;yIq@dmy&%l=~%P+x^q)^-h%Cv(W#fllI_cu
zy64juP22%LZ8trAYS(rTs0lYrA>KP5*;y40N#2KbSZJi(I}6V(5TfaT*zvoua@ppk
zJDk5KqSS?$p72df#D`aLv0V3;@5mC!ugq28(EVMImI3;SnA+((QvWaZEj}0Ndhx~+
zcmn-;nhK3jT(kZVpm%4mo_#3|1pnTQs!W|uoYHn*zQ>ZxVTy<ob?W&tVZCm|aQT(~
z|CYVl+Gj%Mew$4~et+Vz{})QCje-4tsa~qPc10WrzNdA!f5ZHfg(0_nD(DtYkTz`V
z+A7$bmz9(?z_Npv^zqrQT-3gH?W_qvGp{N&&vRqZFXtYnn0H}dz<{k(<qORN-DBk}
zf_LB(__5+>E`TYQQ_;yOh(b0L#hS#e0PZgW%^(h4DkcLBqZedug$kJd80E@OD~du7
z9w8)1II2=bQbSfPOiPt2D%_?^Wt?Ew>9U#DR@SK4nP5iH6)r?;&he!bHZs!U?Ot``
zc96RRvwIT6#r%ctpQS1%tzb0~$FnlWpU}IZyB&F)LeY~QdLT?RmTlNdrP_z@Fwub2
zA+8``PyZHX8rX|Rdug`cLWUlA!0ALs9?edv(ODNZ{=tHFf4aoR{aPby-za?yv}8mL
zEMZijoV(I`$@d}xspVy3#3{(i!e-IS{}Y9fL&ui~N(hC178jS6M!^F=zkE7EqfYmC
zOsNSr$OK|xU}2oT&a8<b9a|VkHgkbv&sli+XnuQJYK_@Uu7#QcfmFOO7E{?lOa$4$
z=Y59k84^p_k;wgH2SSyx`m*jBnPM~=K(w^vfH^k^A62LzyYw@6XC}P3MK%emvNnX6
zQVZC9=_vJXCKSycy=VS1$ea%LB)xglR&NDZIPEc)sZ1dQyiYH$Cz)8%QSLrCphC>C
zBEO{(%w^jr!bLq-?wlHQ3)*VXD^Y<eJK?3*teEh<c*X&oEA6m-a*2dS3M$wFsfdE|
zq<@c$-OI_wQ!T8=y$AQtgBp-kM|`D9Rd1PbncYt0&gQmRDSVG&$uu{)<42jPgtQTB
zI9OZ$i5r7q{T*|u(>tubFsVEs6|ajOAkV^MKsE=Y^ZG7nG!;2wZI{{l&yr`1<_~R>
zjL&0x{OBx#iTvuc9jEka_}{IGQMyc-%HNJ_{!s{RVe}rS`V-JjVGB6bE-afG>+a63
z<yK~VH2UFwzgZR1uZ3`5qVgr3)8bqYF*84bjAHVzVYz^U=1uKR?sr*IvCNM@+q!G;
zn|bDZ#9LG%jo6lUIoHAG+ZQ7JL_q^IH~9^M2DLx^f-`~tHl{0b+I8fY^rwP;p7#&%
zW0&o~HwWW(ulHZ&Lu28)c=^try(Ir-yM<<*ST}w&1pm-D2A^b037Ze@nIiGt9?U*Q
z9AScrAx${VJ!oRxsv#tV^J38=sA;`EAil#xog^#Fya}rv3y}1kNA+P{&?n(h1}_EU
zXDQscf%!Bh+0iFx$9#Y$Qulq#ScIUSDRL;HB4j!5-^kYmv+N?KLt>ngEc%Vb8)6Gj
zEZ`V*S^i=~M2irH`u=+k{Kq1nF(jeP{wsf%{B~jgKXsh{DS!WuIbhVIX5+Lef}ksz
zddMM}#AachFW`P2ZE(LLFBB#zEu?qm94aFZCBr*pj&hMWo?^Yq)dG8XTk<Kx_XvI)
z!{5*6m1w`g@E9a~ku6uXynWsDahaRlSNhLyL%NH0@`v-Yt;Iy?J(8SIAxS(+D9bPk
zDJzqW6pj_Qk-&wbR{lV7k@W>NvTCVXkq^J<M4hotp^3~X?Z83m-%@p!TP4aLc_zsj
zNvYCu&XEPi_Z;?K0RvgGMU>bn<Iq8_3al3T1;9Q|z<ue$u^DL;IXK>QD*Wlh%%WQ$
z-iC^wFmvru;3{DT%R0sxInybgrJF~Ue3kaKE1Dg(k0?Dl+S)oDy{gr=KpA|O4?X&8
za37yy!xWrVVu0m(k9lVGeB_qSg8M^>Dqn;$tSYb%3-;OO`=a92tBX+3D@HumkL^^c
zMO@;Hy?ujQn%7u%KRlc)QF?)^yO^-=p{u;suk9;w_(zneg*}0{bA{5O>jybZhP0n+
z0{xpqmBQ8(aG6brd4mvL&1lMoHq@2135HgWDVPFX+owj+-sf1mX0@d#g7Xn7oendy
z%^qEx79d;~Zdi&7uIlms!`M58XA*7EqOomtY}>YNcI-d4(XnmYw$pLPwrx8(+2`Tz
zqlf!a&sA&U8@0xqV?22cgGVQ83*mXCv7Ri%ZS5HfMDE$U2t+A&CxBH0-i%(~*2X6f
z%L>)HDi9Dg>aQ!*%jGo;6LZ1%2wG{njrl@NSqHC<Lte|z1XMmF<BVk*O6dCg$5Gv|
zAR&Zw@NCY5E&A}+QC@)Ufkt^cV7l}C%9=Qjh0YcUog<^=q8J~<&sGO;bHKZEatFC%
zLm6W(EGq%oMkSnKtt1q&cMv0KDjq=|L#k^l)`TOj#4A~bPzd+Stp~0K!v>}{-53o@
z9+0qDYNJSz3n@s@G%YNf-Se0BNRK2-^gcmc+XJl%?geRQ7*>d!c7qDCc<zx>f4ca>
zH77jz9xBNn7|8Z|z+sxeDHZ+a(W~s^QQdXPh`{x?#)Lc{qjSK_Y>Ky;&0uoyi}2g1
zxoQhV(@DdV&phOb{1&E@Zu!j{)p(`Uycw0FOx=q8E_`&&WK8EW8l+_}KoUdxgZUt!
zK&g<pu$x3KrGXVRnG3-R1N3;GlddlT3%81oyfcct4@ctJLIc9+57&<Q?_JJuV{tqu
zaFE$NNl0BcRdo9GeiX{kkiv;_C1Zlzvu30&N}R@6&PEgO^k{2pI4gaH!GF>DSHW|i
zLd#4YjWLD?GM*YmF^AO1(_@UrLLr4bAADvK->{3Q8VJfkY8U3CbVAAn`;?4J>ul)@
ztGVX`ttvt#p$3XvfS+Loby|;T9W;q|-m?jndKk>!edpj=YE*mNuv>b93~zbQLPMXP
zm5?7R*z|GT;Jc@)uw?91m^gRR+9-b|@6IXB9W;JZ{(1#Xo!XOnRE>TK#aG556`BgA
zM2X1!HUPglQfPORmnUj&(KqJv55`U&C?9E8G2$j4(9XK|6&&GrI2l68+4?+G;V%Ep
zUN!?_y*pr4#b{62E#9e6i~Q|-)X9hNmXilG;`%QgQlyG|i)>s0;yJx`97qL)y&XV*
zUO<$UVmz1_kJ6@J1#jtz7&&#YQOzvr#Jf}#L5Hn~3vteoBC3oe7jcYkSEk#d7tcdn
z3xY#vJ%Q}AlZ38%c0``b##lLbB(T<EfxD>VO_9p-jKuDU7LiPSC~bGvq1^vt=-ui)
z!RO`ILy+hN*{*odiLADTw)*fFS&39F$Pf49vA;Ju@vK*n)PNNfn?uWi@XSau`hoK*
zK909s8n@m_p9tU1gErA@Ua+payujC8gK_8JxI1!}56&WbOKn_$MAo>SqCK=o&c(o$
zO$$b9MdzMB_#jAmPJg=A-~h+cH#g=(GK`C(U$4qt<VUrKr}Qd{HK&8kIQD0Rw26r2
z&Sf~G*^K7LjB6`Lwph&R#{LG|N-S`QJ6l3vApL=c7MxX104H};HdHpSZ2uj8LCj(9
zc@R)(lMb0a^=6S?_cw8#FiNgLQ{vT9;0&+kK;<4>nNGUu;rCa|LY+wA*0@AQS}FnK
z<m|oGt>lG3F(LoxaXQsr0%D?=*%-n}g<GJi)H=EWxWQ@L4d7uLa>ZpniySb)^;EIL
zJw2QA*RN-aC+c>^-PF)`_o?Xde=%eK58sN}uT8}K13dK*{@=jU$@IT;tA?%J8Y9wI
zj)CDUA}LBF&cdY#S&@?D_OQ6xT)B#x76by6qrMrB^XD@U1rxkwNR#CnYm;|B|MrBF
z+ca0vCO`)iShhcll<9R8?6l`R?yBC+M(bLuL_M67%v`n%-99X5ERK%IEWc^Zq8&*}
zlv2-x8dZ=Ax{wt)!o-17=19<^OpuYDCs@vOfCap1rVeI;a-UjOj2V<_(ET(;X8)*U
zMf*_QY~ER;U;wHrHBCBzwPyY}vANR>S3EUs9o36nzE;UqDU3Q}P5J6Xg>H@b)zML|
z_C5Fgp@cx!HOvGy&oQ~&;|@(Hg)J039wt(B7dKKlCX|q!(B^o7_@ztG%{<EKJ-=9_
znY-xGEM-q&(hpFWtBz3fK&98K`7oap%2F~w3gF|8-qp&OV=m~o7rnZ^9<!I_j|O$*
z@9Dk)MTr^<t(*-5OXY8G|AqK57Ob0nyxx}!R(7f2_&K)ud*^*{LrL1RgpD0SFYt`*
z@Ct1l;A@7-11y=8RutwjjP2TyTW%8fx2y)Fs@1mR<ZRPlin9M0JFr8>7TP;tfJB8j
zhc=ZEDM8-?g(ydfzVZI7Ijf^4GDf@(7=W9Y@8{}vs~tOeoJyPH<ZQ}H0b@?atUR*n
zDn01vV*2!8SXt$9E{zbkHgdI|fjYRXpIpH%ZzOCkR_qxt3iJ+uDfAB~LYZAKJFgj^
z+p7y%)}t`&g484nR!!FLj%Zp+*O=)%mrIIG%q)-VOqH!6&!dh%&`w<jD6H3-=!+$2
zpurP#AqZ`bXoE-+P2$mpMi1Yx4NeTFl<(U>Kjo^uYd#)Jp+N!tm@M`i#8dRXN@pd0
z2s=D(GEqyj*FgLo_aakp4c`1LVpVkgh2CUxsiz<}NB8YL$&(pGAA6UDK$W`bNl|%r
z5*6%#k5b;f>HM4qCHV*1aEV(S^vjau>vVf4_Lhcxemk|npK?`DWH|#7f#Mc>q`W%5
zN9bjm!UlkNk6*s6)awe&+Li2k214o54Ix)McAO7a{A{$-UB(e((#<ZnCnELgA94$M
zs=W)+`(%W1YS*nF+e8_yiKVheZRYSUAS1(cdi}p+J7_$z=$;<~K5>-)H@5p<O6>>R
z>DX<EqVXGF16;E=OO|I+Fb2fxW=s-8V34S1SgXs3k;MRlXZ*jj9!>VE?N;`i;^#{5
z5IuI13Ho675bdY1*Im|>*BUI<u?eQ@&8IHky@x+K%|9(^0nvFcEVtjo@SqLbN;g0u
zoG>?VP+-9jaPUxaSQ&&COTl@-hNvNOKHL6z;DZ=wcW`j#((Ukr>=SZO+(_DtO%FVD
zP-a9I19h;5h#`Dg*#QO)`=~q3T4|uQpbhYeiFJ_-nyjx_J6kts?u~3>t;$&t9uQsl
z8ytk6(Yc&5Fv!5Gj;($mJ`=rt{n`q}l|`*c60qByrvqolk+79C8XC0cA(@rkoS#;^
zC7Bg`*rPYUcT;#$;?K=f6ynctIm;58R1ac}6u1LwP0k?FGX|yHC?bP&rA81FIn{iG
z6sC9Tl7~ssO*1{sFhb$owm%Uj-FNU$#KwB6y_VUtqMHepv#Ogfc`8Cx(mQFgZ5TB4
zKL{(Bm;QdcjIWq_qW>n2h@uK&W(D*a2MB>bbOtEc9I?vVPh>fEvr6BAcacLblY_p_
zz$Xft1+8mq$|Up=w2#LY^7B>EHV|)1HNi%x<FpI?PMirP5qV-m;AtmtpWw;KEf+e8
z*}K@CTTJdxmGLbysiPg)H(hhn8k04hFiA|5I-I#A<We-Z#Qt;Z1~aIbc{TBXHaVX{
zcmy^ggqExm3ko3wE!*i1W*>%+s|yl}1_^UlVm0do;7*CiZnRti%$qcBXdZ`!m{jEG
zxev2&NM#XM-4~JW_obnx>{w|}sCD5>oOG=XvsD}<T5^n?A}Yo~VF=}sEnuMmp~zN%
zqu~7BHdT<=JB_0|kK;R%<O-BFH<|-c<xVrJC?Ow8SfJZ6DoEuRS&I_rQPZpNXqv;e
zWO-LTXP;bC-PffVM=VAF5Skig9)7rH$P5p+2?kKIrNg<F;&^Q7RS+b&RZwzt6i`&8
zVVTOlag(1gf$zozsG-F(8_rP<?yI4zffp)tu1u2jC>RMeP$jb~L3ml@rn4Khc@}4G
z2?xHEIU+@t+0Rr_d{WHxQSxQhX`F160EcqU)AG>*;Pl|>5)@gE-X$PvnFj5ecH<~2
z>+GWsn(kv1jC(}@x#TobY^#WBW8$<2<A`lYS8c0XdgU)v%l9u4G0ed2TH=g(8AQ(q
z3|x*iA&lUvO7sokJ+hcBPIz0gi!<(j)LS^mkD12#9JKA9xX@!z+w-0A*w-BfRwj_o
zV&wjsq5ivk-oXl+FNqyGWd5X^wRFqCF`q7u8A{DZ_(+JGAw{rU;3^uJp!n`5`cnWs
z4JvyMrN!G1?F5GT@0Kt`#o=;NDZu9{%xmNTEqUqDFTk(R`7A=|DDuWP35;AWhXZS|
zIs!844ZaY6;v?=_?T=64f`=^0^DF#9{*7oU_n}wzo0lg?VNWL0pH1Gv1?<}2;cCz8
z88O?n=3`X=y(nqVL3ZAu!y2;({{2T~XvKFroT+)iOZfR0S8B4kwoi7n>YAggb<x#-
zrvXuE>B{Lv|BlMx_I)-<qd-0q7TxupLC>tHs^pW`&0zryvOjA-8kV+&Dva-BZ`jEX
zNj60Pnm%Y3zEHjqt-cXptC`MmoN!F7Qn>F;Cm`ug7i=$Np*w0#!_|O?Ga!EJuZFBy
zMgk8`Dg<P#V&)I`NP_kcm3&Qpr9r$ktmP_KYy7S&zple9{n#xNRBSR3n6IGufTt2t
z{L+U5{^aw^h2TzGz{w%Qfw0HZn+|OoFmFO@*6%n!?aDmor*-}ub$C0LHgd#cz*mc2
z1=QRn;jlFPJsN~8Ci@3^h#AT(fP)C5M1ZDq7@Eb?&3lKwntx(5Zf?n5%Q=0LI904X
z^hDs{@3W)Q1VFct0))CdSgkK@$~qd1030pSA;;=ZbGs~;&pY%!Llk5&xr*+GrDlr>
z^pEA>uK)dcg~ipoMf`8y{}064TXT^1`GHtNaQ_!MZ|h>?Z0T&_XliKUZ0h7pZ|||F
z@#DM7iR81aQFj$Y+K29_<2?H(z~vX4<=KKz6`B?raKEH+C>0qD>EVK3uZU8EDTiE!
zScEP33dR_h6BkQHywCQtI80hV)M%h=K51v|`;Whi+sOTwS#o>)8GO6h_=dl~_vtHC
z8dW-~w0UwlnxeG3zO<>N+KsB2AUH=}+KkUYO*2|D@EIv@=sy}m_i_^$?Rz79w$|zl
zmez-i;+h&<;81ps6F^9tI)6Ebw!@&)%@P7OXh*XSJtCk`^@xmxPQBg;A$wk@<+pQ;
z%|^W`jZ>hP1fY_{^6?Db!>UzX=)=igSRo`g<iREl&nqXjKguGASNUc9P8n!14<j$|
zCR*Y~I~2QEHf+af`t0ga>RL^EZ@NTyqo&)!`IOR?W3T~@*ED3Jzzu!NIwhIRmbSUY
zAYo!c#*veVfCAcsgG(R+H9>YX^m(62D#qB+l^=U(p3399MxXqaRp``RdqROhypWT>
z3O(6mri4MT(1|hV{pXGCRdc}s#Xynegk*17Fd>U^gqak%@@}B1+rw(Oh{brQGM0Hl
z|K5u7dy(r>MMQBOch44T3fM=2Fs`Z>Dl6>6ng`NVgHiR{JfWX>BwR=%Fv(BOq5+s5
zdU)}IqO5vWpdLx;(n5wPw6iMoU?X6`xDhtD;@Ut-{qheIZIeS=g}R&~3WWoPanNq}
zvZA~yqwldHQL+Mb`e487UOr{xzmil5KCa7sjp(Db0n7_xFk=l&z3Q@bpxcrppKM4y
zC5jpEE03{7|MZ<j{bq_&q>FABVlqldw9JBBNkv`RjBvQYtQj#)Wd<jf%Hr<1C=dQ^
ztvLMAv(>!@cR{@Rv)+JAjWY-$IwOp*--;=*Jzh`CV<twfs7AxZmiD)66TbD%{QVqs
z(>pt=dT&j!0pOE87P5nm2$9B}<hHFE>;z1aJMIxsx8pk$-%~DYcDwCXW-OBlNMJM{
zY>F|Pc*#~Z5VU-FySHL?t`2!bm04D}$lXnDy=CQrSI?Vm)^Y+ku_b<iKB(UjyA!f&
zG#)d8R;{A7km&Md#Udm?O0L3Y^xT}!2W<JK!Y&uKhPxA9efCWSY!jboQkDBk-y!;T
zA9!sb)C3+r`3?O3SwcCA!c2O~(3pOr^DF-zPzYv-hQ*D)eh#-AtT1S@EyQuM{iwug
zvf5Ke*M9v!oXbeIgOav>?&3JG{x8Gi|GRFDH#K!sa5>QUwGHZ`ncP5FF%#IU69WU1
zN={92pvK}1VG_0GiX0etA~n)np7t)RP}7JMP7yB=PjtQoU1D4@d9cV4Ib|cw5w)dg
zPT!{}P9@X*LZtny?-g*6zD*ArJ1T_Q{sPhZcQnD#!s9{IK)Aqfz%8)!F>-*)V{G6p
zIGypq%HyATyb(iA0kwgbVa_!WFGM1h$|y1@aU)I<wwacts&xa~hxAnob{A~H?!X1{
zfIK3<>IVxP#)<b+D;dF63GINdBtDL(-6QOX`QTOMleQ)t$@{Zq9mEv1C;pBmZ#TBQ
zLet#ulbPud1xK~)XT8C$Bi}8C=;s~8Y1HiF6;$L|q#Dy#O)P1`w^y;Zo_aoVb@u|i
zX1=!b$V|u-&sAWD`v|-M0LyTH`Q6&QG#lbyE6Ag<`Z0JrcXc-GZTdmyp@-`JvAT(D
zhl6q0v|d6MjMqK+vnqh}_7B?Y93B*VbTII=E_lgj^gU$gb|XkPTK2GW9PDhRg;F;L
zR(ENxva&DdRAVbF<?zcN`sdab;??{X^T&uhBxH5=V}ZR+dw`X^(9$yI5nfODB}!TJ
zN1P`k@q0a#rW!|pakFJ`aE&Bc)4KInry<PxbA_f>QW13FX3GFcU2(`MC%15ga4Ph*
zOe5}UOmmy{YJOR}*&re!MoOyHGlq?Vy?H!GX5`9s!A?FD=H@7J3!xEX;S&GPB5hop
zKI2|KHW8x!{cpVcS?s<rNDPHYd3;jnK&j=%Rv-zKdU0k>Vf>p3+q)o}&N1+Sc_s}8
za{PP%`m4p-ag@yw7<1z$yO9xtArS&%lcBm%fqz6i0f2v9TmCbq(9_@D+%#~aeAu3;
zkE@i(Rn|mM<2SBFf;r!-WVje}AP84MQa&&hIPSK3Q{27-Scsul3nN^DGmARKB-Db?
zXf~hE!3J60{@;#}XjAA1B<ngXiL)C>W``S}3#ahI-D}vJlpRU;x$A1UriPhuA(~j6
z6?a<XCKygJq?bw}closXZd+Vi6{e5nPwH`@ksRoZzJVoc4N{dbl$fke(vs{!MSG1X
z;$0;qdj;NRy&hAZnChd5xLRplgk#%1c>*~!v)sl3`zoOWpA4(;=teDZYrzZoY3Ful
z^KgCL?T8{6cw*h$%jw`HhhLo;?kv+0tMazn?MSvXeThg6*9S<Co?pT}J}q(A1#y@>
zf4sY8My0!oju4UU=+PMlW*v^_9V}-{JNu`y1T>w6s#QGDi+Mj$SyNTk-iui_Kmpk$
zjkWZO=Ej=~v5ykw2D#NN7e_n#rBi@sea7USyBVAp!hUq+ej@r`Zk9fK0}s!(rC;<u
zx}Uo^t&SOA>R-?28)wj;E@BqvdBV&%JeC*s#A7o?at+5hyZ}aWm)6@A;~SZ0$Pt_(
zclDQnSo`kv8=vzIknD&UVXG02>fDRXJ~&lLxtI66j?fj&^gIpC5V_IP7l-E1*||^p
z%U#i=j`e|#TP?6+cjXX)93qiZC|Q$R#ei_UoKnX4kwj8*2*nD{UFFzP<?iC5x0nwA
zBX8!Y82M9ERP<h8mNNa&@yNuFM;~H7l^Y!qx2-*z)6-ZXwdVfb@sn#un#n|VhCH(G
zYfeczx07Vj3$X=P^leTA@-8%Yz)fE>AGT3t9qMw7y#Pwe_wc!<&^vgCAPHkoXhWwt
z6(F2@I6InPpc}Rc)cJxeu`LxU1+N3^BR^n+eap(85ndvriV4Jy#Xd=D)T0?{S^>>e
zZPu)76G@Lfri!Ghob+qiA*8Zwqm_?seXdXYm*&<>zYKmbT3MwtfxL6tRhFsaTs0fx
zEzOfMF;3+gQl$k`a=SdWSZVN_t!Bb%5sT%(HI_1!173ab2CgIybD9#I*)C<I7dum1
z$>=3Dby>l-e1eWF)n)S^)Orn+fyZeTx1!AOFK`?awO*I6@vfyHW0$kJSNv3p8i-ax
zJwJ+{b@_JR)iD>@`X~IMbc6_VPi4Hw2Yti{rwWT>(xF#CMEHDVsRQ?tn>@qwlC5-^
zo2<nvobWfvUbm~mUiOvTLm}Z!P>_Cw+U<kGf<5S@9IJcGg2Tw{@hq*NA9473Lv*!?
z33CD^n$wU`y(+EP8Av~y?KEQrr?9p${NNq#Y4m_TX#tQOnmsS~X3_4J*ymqNy_V&w
zAt{&7km|BuaS+54Y8Md%b3JGtEq%L6vJ$B&@Y}YTfn72h3`yasEaKi0Vum1NSrJgS
z8sMOfCkTl(->PLwQmGx4;ea_T-%U8Kc#)}mAIS2P5qL`1*PxEf*Z9Er=}~l*wXm${
zmn2i^FXkZ|hu>ClzRBsGrkDSVu;+get(339rX(;BP#6dh5Xt|}R@-~9S{eL*CfKI#
zhPL)Lra!ngIZi=lP!MV8`ke}bmvjheO_d4_L>(~@vipI&!6qSmle81pjHCMX3Gk)r
zr8pCyyY=t=@DeuApAQKp6dVh_Glg(fG>%q7X820<7$Kx34%!jYM)8sMCL|#$u%)+r
zPZ|&0@A)e6M5tmZE{LL*PaegX)-JfYjt1tmXf}V0`roF}M4v(c8QM{?7|Q;fk%~AW
zNf~k3ym_GmKc0AsqDaO7p<m@7Qb!D{WLwCYbwKpzt4+65!up|Ew|0HyHsx34szuHo
zqC$scfwlp-l{wL$<=?a*0)O+?+&!c7ccPgH(w*qg9QjERIW>f>rh>v<uIRd}Xq%=|
zC!J<l(?_iV@iCI8Io!HY^eo?f>pj}p>>VoDb4XU?Bm5+fU2MvoX~FbS9jV%iehAn(
zhNyXDeppj!y)4g?-$6M_Pc!WUSs@>}|NXW24_zqaRZHvh69kl>YbDbEe}OP=R^hQj
z6++7C<`Hmo_0L1XfpOD~Uy%w16NWk@Ld#<ul@H3CU*q{%femf$lsBT%n^U@hjeh}l
zo*DeNHY{2IM{!xYzUkq8KUwu{%=U3*>x<X-ac_C~qjC#{CK#*ee@BQO=P`3HNJ%gg
zipmYY1F?g%qrw108$jKk4*ABC2U!89S;8}02?SxYPzj>`y9uNasSait#FMQL2d_Ju
zVENmjYdnXT0tNj<T#bB*6bg||I<i9PAo{Z&aKXQAa{;)v3Ej=fTA3H_wRtFRDNQqC
zhfg$k5DWaUiQehvO_ezCAu3HFxuQv4DH^r2wC^j=K_O%+RrbNy4;g)xkM$?h!q`3)
zCu{fx2G+%^9g4RL-Jg7KYPz&5-Ioj*+lf{v6S^5jw|=cH(Cc&fk0k*wU+(}5JTklg
z;qRT^p8dfMts_0$5xkIJLQ%z{MtJ?CECP&EdvoKacYX2!#ykc>X!Y&GYMJS*=DzQK
zfb4t^Ue=I1Rii`1<n01gQ3Rs#LV_4HmuI?DSVx&>3_R?M1Z_Q%wJjA!pW;nf9Idx4
zI~1p(Q_cvh`7P|P`lT9JhpY*>{gn_vBDz%qeG4>?X_`(W?Q6{W6(b~lzrDnG0iVo(
z-enadV=Th93pL!L4)0Y4T;;Hq+|K9%jZ-*t`a#hcNlydoj*NNG8=ebK$B3tTRy{R@
zNoUK9uc%&RXk+^|qrrqN-5Uv>X6nr1DUNp)#YrICw<q~fmnlHF4AT=Q7M6OVW?p|n
z7kyPIL55H7VM%P1LW`i;A<0~Pq|8pMUBZXEzM^gPJF|d(qH<NS`=}(trSt;SuTUNH
zc__l2QX23H@!$0QPanZi%YFLyleS_%y#(j~k-q<JAGWb_)`N_2A=h8%9b<nC=jvr(
z17_hNc;MGMl{B~<(gp0ZlNsN(X%Y~Acr3B&N(q9nGJP18L(n3q+7IZ7MNnS$N$GNP
z9IM=MoF`SQX#+g*KOk)hAP2Xn7BLTUEvRp@BrkC)<u~+e>$Fsdzcg{F>Y8U-{Ffl^
zX0>%`;$Fnodz18Kw|wnmA~jpd=N&ee_KaK8v;1sB9FN>*I+<_yd@M7(n6EtjK%@d=
zm3*Ub=5=S<R6*3DOD%uVo3Y9$J=zM&-P39RqxtU{{HKI%(Z0Iff4-mkr;PuP$)Eq(
zOaFTY#&U{s3PFNM*~N6#W%cC4EF<{T)x$wJh2nBnyjaG39z^R`NrB$r5Xo2yUwVk-
zUqIQLOD6;TTL|}CxE9{kJOt|Bt}qjeJ3hmmKQf4YpoKuDLbQbSJN+O5@QU#6O?hQN
zbneF`<iAl1Qg+$%;FWOeK(7NkCxzI)3AB-_{Tp0PThA;mPpA*u*rU9D@db|T=%P_V
zCubI4k;!jv4)%0%;Nf#mrB_+xpi5kdVz7>2C9XmDdOU)iTO{vl^N%4{=9<c_%|4@I
zm-G<O_B`G<ZS0MF^3v+{6np738lUwbY>zI1Dk7Kk%PHt6t)Hl|Kl-eh<Sf6(V><W~
z(4t&59Ws+ALp$a!(QeO$97!7Qd+1M^X{N=E5hQ!Wloyobl$Xscm}$jJGPfkr{pI`h
z0C`yN0By{cR#r#K5I<k#y}J6#6OzYJ8VivT3c_;>*ifoCpTB36G<J3Zv-gN|Oci}!
zN`}W3+h(?T>2HK4>bbXVB*A|J*7lRMcrq7I>7Kwqu8F`v7(YqtWMSxN%J839{m%y@
zJqNv&p<`5slfCKn8XXSLuk{vNuOW5~oAvA%Th}I|pUHPyHyhmStnKR0OQ&9+srJ&V
zD$g%mwfkzWGwi})_tFRuQZx`OEIcgDO!x?_6+f??{=7c4V%EQD2T4U*{3HE+s$Z}$
zkV|ifsy6axTQ3A@n*u?v(;o_D1!)OcO-UUPjRg<gdk0{G0pT79S^?p{o{l9_(Jt}(
z)A{E`U>&f9w7<30QE73qB2I~xI!QRh;YEbxHu+-pFKo_$Eg9&iNWiI<M3^vkVxsVn
zIM73b$E3o-b}B;1+;cxhPv>_xZv_w^dUJoh<vKj=HkrPDhj_np{M{Jq?_Zc5xco-M
zII%t3u|fRkHAMc7cCl{&TKhKeFZ+h*0HM1KIJ#n&kD>j34F?r5?o|HvXhk5x!dmN{
z-75hI0_4q{yi_<^Q)orqCX1`$Yk%#Ur_#WfLvQs1uETx6#_A>Btd+#UEFBuIfK)Z$
z+(~Ywpu>(N0dnc%7~SyAs<&ov2`P1p#7?|)C<yoSD>CcoZWB{3I+tC-);<BBVtTjA
zwF<p~XdD!C{$oeLBK(!s6!Gbowb|39w%_;cDIk>UfwbC|ECjMJ$No3L9NO=bxukHD
zY(}`+$gM`A9mOhY2wCRSt4$MTA#KN?l||I}OtqCbBqSGDo(uLW@~-m7;!Tf)V#W|{
zLld)RoTrB^DJ^G-&0BMGB@noO9-J2?0xq(|JMq5YpXCtJrdreyUrtznr{~n=Ffv(m
z+8YW(Ed7O5z}ZR%h3_ooxhr4Cx0>cKUV+>;M7i}<?rnwj3l2*M`JqLmFCLyQJd1Bh
z=1pBJ)hr!s6=HV(G;1lX&DEXU6v#W7YYbV_2(b$g<`XO<^bUW_p`kipDK<A=&=+3K
zYTvrO+E~&|?>5+G1W9e%(_B}lX??|E<e$K#%<lfHs$i~!tmkS}Z;5Je<Ti$QXFxHR
z|A_nQ7&&H)4=f$=neUVE!=s$w^6BkvWQ~e|4EV4xqEU0!1a)g_#@3N3t}Zspr^gDK
zh-$uO{yruy-Yrr2cpXt<QxGo(O$}Q$_WpWM@+=Qx7`-_A?f7Mnh?!1~PsBcqr{ZhJ
zraWuP!feiPADRSvI7o@!$fw1t_8ORk`b69qpRtT)(-FU|zP~#@F7Fe6N>t?<T_}`z
zyoAdvBu`VCoRc%Z<aus-J~WSttdyBrI&yC&lDIs+D5_UtC{5wjH<y6xr1rtz^4ZEb
z$QI^4Vx@!}ZZontoH1AB6V<(u{g_ki7@f9M6>#*gbbGQ7oZo{fUj)AN44!nZsoe}b
z!-n~};$CG0x*CsCXL$ATf$Bp!S(2wEZc$U3CU~&bhxZMA6qtck3he^KX`_PVGu3Wk
zcsEcUs@%=}VsH-PQ$16q|3Z6b0Pbfp^mP=K%-Ru--%a&!h_jrhf_i;UbFe3r<X5PS
z=fsO?D(}P|oahvirQgJ>A(pdSewzj()mm4|<SxF;u>*<-JM<iDW0pi;K7u7zlaQfw
z6-QEDRjhxHZd9m6|4aMXPDuYuKn!}HE|R_c+}czJyz#@|<fDS1sDLt;o<-d}hMNLI
z{OEwuLJN+WotZ>uRLIVnZ6D?c3#3#tJEU&T)D**yU1=c*v*w|W6w?nmu1g-nWjJ=Z
zhKr$6llm`9spkz9yg%Rk@cdQQKyk98#J_5lPH_jL&sqie;yVKd6#xRYA<_o!bkCzD
zn2=C*CT2m&i!CVsID_};J%ZAqA^(8yqkiSVNWgU=4S&abt)!y-P~PeuBo}ZDT~No*
z$nM5Gm&Bg?Scq@8Yk@ICC*{O;0%&6WP0uS#NI3V%Kp0KPzEzx5ZL`r1(o`=#qL*M)
z?odaZ8W^OkQ~BOf>q*CrY3D(^gii^W0a&=Co3R#;vJ&0Vr-x{Yu=hujZ(%qLp)q9c
z4^28sr{IQ^I1t~6kuTM4_Uw<4@m^_`Fh@LwW|i`dwj8UxdBHgziNAHD7l|^LD)%>C
zhJ;~Av!?ih>}9(pdh6&^_*znqf;?=#d`v5>5V!+LPVar1y6QoRMOYa0&gpvVnNFFK
zw9mA;4r6k}(v4Jwd^j^|N}AMG?aNZO{*rbHO6~+jh*)8W@7iBpkk}d88HE>5W<-x<
z3^P|>#+ivvqAaZxCQVf@yO%k8f0!NyLm+0WIQ8*w$iF5?#L-*p*-jICzPfE}8OClp
zfw~v!c^_N6xuu^DRcwH{M5{4iR&5b@Gq5(V@{l|b;B9mcH|~;7<@G7`aI`d0_)g-&
zy-hR>*#-_J8+7uw4`MI*y32cUMbhYV03--z&H|K%Tc@}AAuX&?E|N}3pZ1aJ78k5z
zdpssa%2sF4IR<T`%&ZSadnqc;#K!G_RO(YP52O{?(nE3uUy8jkeX@oo`7w>JU<Y%O
zAK1?+#Kh^k^XG=~8xmU3!QSRUPn>fEFSMSrWStUZmZUN0HhCIZ2P<LT(;CEu<!r=^
zuu-#lMOfSv2KaC1@Ow5q&fH%A#Eh88IPgIt*Ua^WJ{K>MD?!$5xws7y{wnQINRXKR
zMP&d7LiI3k-}Y5Ry2i>PA!ErQT;R^bzK&A#jw1XQB^V2K_z+ru`&8#VsVuRmUJb_O
z7^F||w|dtd=ZwzA9ipDPYBp`^+=mJ-Prp3OS7_OqOp1^yu9|Kl&9W<m!L{ps;JL9i
zXYa(min|`J&D^{Q&6w%e)BNV`D%t(erSTh3ijdBI<M(JF|7KHbZI4N{&~)K_%CYfz
zEKU;Fr<D=~wZ(Q;_k0FPV@|m%dMQjjHqYkSljJA>I9FVWP<_<o+uii{8Mgkf-9ZIb
zPslm*YO)6O!X|bfels_Dbh=}c$vDlIYs0ulhkjV1<b5Di4&UseJHm%SrZJ$Q0#tPt
zvLED1jX-HIv?L1W=0Jc3ZqgjQWE>qJ>}4GSv_?mRY%r9*IAF6$xp*zLwcMT0eOOgZ
zD@rTpVNX{Y<Z$q8qgOSTfxHbf7~Xi4{#xO8${{H1`cmz1fBGO+GGuMVMoO6(Y{aNX
z2;oc>#?c5j-Q6CS=_;R*96}^>w0o2_@^C{6Q>KHT3UTKhx^Nlkh_pCMTv}7$b2x6&
zROlr-WkO^T=X_zFfw-&<`&bB<NHr8lC-!A1gv1c5rw;Q<<0CTuXCzEnGJ487$t@ZI
zG|f=(+8lYeRt35$Qx+A-=rW6d8MRSgOWegY2UEu2bCY)r_o;N{_z)P;USN(#(G^sq
zDN%_TXxB9gPco6GwBP#bzA{mR{+BZ+mHS&lBdI}4t%R%j(?Pln#IS|CflG1yl)YXz
zs$bQ&_$Ds@ickoniz?$@fR2j<rXfYLu&_3hj(4kpt)FTfMTPXW6Oo+|g91rbq>#tG
zynpt6B&#q-`SE9doiR91qD?683(GMLg8E&XJ}X<J@M&_h4zk8p?*;cmh|n5=8XXk9
zcRdi3ZV+5eU{n(+G3V_iY|6zgmQ5=a2Lqf{s_k2f%?A`fQwWUJGb`!CdZrKZ`@^51
zx0|D31Rd2hNiJ@~z3qaOLx5|aoadQiA|SCbA)!l}!R2<h9p_WN;Ji{$tf58B7~nYc
zO9f*d^Ym{evfd`=BTeg_`sPdPvUZX$k0Ga~-@*%~@;H#{2aqa~>&?X}8O#0}VC6!8
z<w8jEll7v8Ov*i$5pNu}d`EvpZV18qTdJX4^VPh5C7PmL9mA$Km;r$Y1!Sh79u(9q
z)&hsEq2A6K?(rPrubBwt^dIYoJ+(6Gk<%Rdz>mq7EjK~B7YQ`>v{ik>-D)WP27g1D
zUqpX3!wHLxf$y(_2n<|-$XXBc!bUd+j%;ts5Fh@4hK?Q7CUD!WB-hu-rSVd&G-%A<
z@^M<U<fgq=tVkag&7;RxK0F-Z0G&CqE+cqz(|a=G(!UnoaWz*MuJXRIR<S%ZJJiy@
z!3|2^Rt~z^*Br(NZ3Hv5PQKR2ephW@rbiW*hX8#6^=W+`jRI>mg{U}N>`(cNjUDBe
z<8Y0kR>J?mXJMN?GLtCzeGy|vzDw`E9Vd1JMJpe%I0kipoiLl9aCIhy>Z+n`MUOWx
zo)TOyCUo|m8dFJD{y?YZhnJBN+w;apEjutvRS+f(aH`oYheavHB9IzXdZvpP41LMU
zQwFI8=>bCf0qiw<;N&ShbzOQS23jfbCbkBCbRQa;o};wrSDrnK)D#`#cI0GOk{Bnk
z9oPb;q?TlK@AdLS7nh){$EXp98x^v=e1)XhvJ|x~O-ilId-FXb(ByGy22N0=XRP;<
zOvHaTuZA(Mh7q12<pt_YWd_2ZfklVCj0z*(MhmMV`x%H2_I>qgzS1V&UC1d;B}jLo
zW!NzI_?xj!D+SKK7tZmg(6?e+O{KzHoL)s^JVL?fO!$GN5vHj!0v|4oTZxiLaMWDQ
zI79gF^{Po-Ed%l@z&H#N7_^$Q7n)S6_t~k=gDI4bS7R@N@wFNrG>+GvCC?|ygTJU3
zJ2&fptT;C$RhR!TJYN90N4+yBF<G=|S&;K49s4lLQQEHk6EBCElih=tWbvsfeL)#=
z3)oC%wRHuCJv1y6N9m4uNBxY5Bz+*Rle=6IH|+9SRv4X2ymam;y0pgTlIwXFt@R-Q
zvE9`QlmJ)^g?{_rG7Hje-Eq{#|7LlYHCNTQbqDk`a91<}nOGwV%2d)Yqf4po{*hst
z_)(FK&YPvT;6T0qr5}G<yN$@q6GY-1$RxLgXy4{7P+$m80qs6abm42jxn+*NHYmj}
zJozdlGT?AuF#gf>nU-49jb+C6r{X%c(k?o4u-foa_6vg>*M^=jL}b)h*F=v&Y6}}#
zkafJc_|NhYXsK7UmO50eFCQy=OxjA`gpV))V7?1jNeyG!M;}XyCftPHj<N%j<OiX?
zA|`Nz^w;ncuJZ_!)cE4#1XHN?H=Tzcy~K!G^eP%MXsTGojPTc66PW#C;`Z=H^<g)1
zgUGF{=#qFinb!k%O1y@3>RM23IvK8^`73-3T()R}J>*+-rbz0U(|&a)^|YZ(N-TZ5
zZa0E=N{$8G!w3gNT8@df9@{+yQ6;GJMtJ`U%l^2_xcT@rS3f){>a2{y4ehaj@sx)0
zFRAMNsan+e3Pt#olwp4XSc)<q!=U@v$Y{77k@)w-sJn`&_K*%B;7*TKS_VUa{ir2V
zeOUn>1pUbKvrt4s!(>;Wwp!#>9?lL5bJX#Ju8=8Se2UjaT9bk9fX79XKQ09jUT47M
zCuxWt)97)+2W{t|m|pKh;?-{iHcBvCSn0!PKCF*eU-M#=aK-9PDvAGM1b`%}aP^5I
zV=M^<-2Tzf-NF^t-iHfp*9%)BzUBBaix{ra__iUcz58s#JaMwt=+-3=f^u(MsyrmL
z?1p+X&<-s63W7qkXJoX6Vq^^TlhA}#@$zC(apHmY!p$q>AzSYqs0vyO;U&ebP!w4d
zw_S9s-eQaO<5G8dkeQVsq01`KJbk2k-ZLUkBv^wqCfJ)GSiv+UvFa4&99*mq3L2}T
z^}nh8g<J<akIyLDYKLsIEMS}uZPYe@iOpq1wZ77-A;)6fEtvz}27{G?@OF15Z8kDG
zvnMk*vI21tP;?~cLta|~Q{_LTY?Cz)<RX=G1;g1<9I!*37M##*J{yh#TgND%C{y%3
z(}Pff<4_UqA^uRN3f|1Wu6e~}jTa2-qx<G0^t?nbO%JlI3l-=wG|c1y*}&hLBIMGW
zNj2mWFU67iyf=7lPg-u|;u6H(8CCAi>AbFtHmKZPo5FW6+MwQ8gd$?>UQDM(FVa-+
zS;0u!P9IHKzL%>u1Gf_4m_=q{7dB-tswxkzz(}9`MoTWkD=92E04H8v)ce`i!TT&)
ze{>?T2gA_=RX4dR_Z91osrf5llginkSLnTauG>@!6lbt~P?Bwh@3bRLuIVK>dYSWk
zETyxEXOo>63k#AS3v25;z1q(~ts>~8nko3va<S=rG#WHt5Qn}(g=)*4Fhm>XXDtj1
z#|RkOuiC}>HS6E#im5b+slmKaxAFR1;NeU|O{qjdyQEZw3z0)-vp)7s<xq=srPZqJ
zMRgtx1!Tti5!-}-Oy9W$^SR5B3;D#Q@8Y!<)IZXktwJo%p*OP3oy-7kEP1=j8vJR!
zpbn+WKqtihDZ89%SNojX*_MQu`nD{c(0QPA8=s(5?Z6sbqTJ;zD4&|i!Y>(#f1mE(
z33(1X=(Nr}A*op^I#UZv&#o)x>!`Ix!lNFfHmFIZ^2dRZE!+#op9BXC{bAQ3Th!}9
zNhicPova6~XQPzAI3B?=x{9*@T8Kj5E!(jF#in5;<F^tAKgl2lbdDSd3B-w}4THM9
zYxzsCbG}NA$i2J*-h&B}%X&ky;q<NO@xfH>wuS`yf_@y=R&%Rwwv35=XCouH#H19O
zBh!=$bRsZQI8>d}K&A@`;`jH$(C+V$?o#(dnj_jLNjopVlmXe9pTcj|L0FBZx5o%4
zR0*r~j$Sxji+{B(s*5d|JQy+BS3Fm2%P$`^xMak+xwRagg?up5@6rZ~MXZdc$f}((
z$1a>J8r?N+UyO0uv(;`rhAYqYfHej^duIwZZr{*AAD4xF()o%e-_VdTGMaQdWKY69
zB1Rq>fFi4-izyBp7WHuJ;ruNhoLgoRR-_gkh{>Ky6Apxb?W?w-cc>mT-{DleUH&rS
zoYJ^FGFv(1w#Mx&5OO|=ilHJ^WY#ggR2BSak^;%;S}2O>aNZZLLS?3X@Oodkf~JA8
zcB^P-@b=S`@$|nh_Hg=yL|gC`sO&Rt3``mz>b{WxrN(fT5v?b)lo=7V`DIPPb@B%>
zGqH<$Nyj{sME1)vL)SU~@i;?I-PN^W2Egfmz%0+62*I|N{pPZHCsd2VfgG+@+ZYxR
zy4&}qPrlxV&e!PRGGQ19tf(cAb!Z?vV$5)zsVwS%TZ~zvqbeQH7gO4>Q9;UI=ftdi
zVYzPP0|Plo#wpU-IX3m%LqZ{TB-ziU@Q}TfIu?8`%$9-Flz)G1doiEPI}TXUB)7S(
z6U$p*GT9s1uhNt)Ipsx{FC*cI){t(?YqmW5?^+B;g{iXrS+n1E5y-LTV7LR0-`Nm(
z*732Mi3Zga^TEZ}B-d8<$4JLi5MP8^=QnQpzsTWdzTsf?@$x7Bg<MSoX}Fw0t5$(!
zsCt2Y(T3(W98=RBuyHw;zocNlF4q?Z_L3{q#yy7UyO<fo8@M6IBBqMNDsYHD5WZPO
zP1@W>AhdduhOD<$hg(mQE-`ye#H-r;2$!<YNeMeRGWXdmZ4ck<*}BhCFNslo;E!)g
zF+<flqzZBzF~EPHBFhrV%u@(xva8?hP)ml}>9FMJH$2A`D`rkKO4~f6IAswb(kPEs
zzrf9^Y2Ir~81XE|#$_Zp{n=td?w76IXd3p5Pr;-yVOfxYgSzn>4kl7m;-s<S%k{9<
z@lL)h^xfy|AB%5Hd?9wT;=9?W%kVnleHACw*n^*jUR0mEPWTqAeADC55;^noQF^d=
z7TiT<?y<_9r0)dND9sS(GTbLoj{8k->USW05!Mdt%eaGKC{Atdc1FG`y-1N_Xsqeh
zHVn{3yw=4hh->Q7?@6gg-oo4S6t~0UxK_5B`xER$FF<Ivl8>z1X&En?+mYh$PRvli
zS8CZPR`?ijTwA;;mC<-5Pak0~nW&RObf>pVni;3kCI(IG>VdqBR9RrC!oMWtC*gmm
z%yhJoa?>n1qaG(L=S8`|wLn^{yOk|dGkO}>>|Pk@wIv^jV)|FzuX~JN;fx_=raI;}
z6FDdM)7wz1i#e-OP7d{5-KWz`Y^;0n`fo*ut0PS?-qdTG<<j8eKwr5MH#-A;vUdzd
zQ8i!l$ZB$_OtBJD2EJ<)+R-e$=+0DJnM^f5NyF)e3QL>7AF%6V75M^Ta@QU^0i(D1
z<Zi9g0qSj`e>Ju?5{QVdsw}<>LW51T5}M0OKN=VsbccxZ<v3?;K3?uqEzl+bie*ip
zn2BmC{8Abbc+)0dqSS0}7uvkoh*vo4ET{+jgz;HNB#BQL97pk*+TwZZp24z^)$MJ8
zZOz$PF8)o}bp%qF5?D%fQxm(kG7*+mt=Sdrjd4a8dZ7-IrZ{@Ddgqvtj9%>G>SDov
zGTjii(!*;p7$)8cmHs>yxK9X$ABE^D5fPw6r)9{AG6i6x$)Edt|9)r#W=%R{0jSzb
zY{=xCwNuxIa3pB~`RkPoBczASqKdaV$I-=#CZF#63nj?g0anVfY(kMRnfiC$2Cn@(
z>3TD=pO?K`ZsRc7&zSAdi}Yw&MTMvs#&1Jvd9S1N4eokMy5kV{eEx8=sg9G$GAMC|
z#QFvMrAgcQJF&$V#Y<idDP@mvu)rfv{knF=O7>i-6#U1rRq?DVuq2P-<hNt6#*@2S
z+4Cf5r=Zdo!f3Wh0_0|YV^^znCURSViTLQ`aG|w2r1bS!12<B)9<`sW58`ni>?S&&
z0Yp2ajfP{uHZ=);<Qg)pa**N$Ogv0}pxD4s-~&ELQNLO1qu0-9rh$`6NG;KVkAJxl
zy)CRUnY8cojabXM%yGaosN7JvG`5P@642|8clGrf564OIR{;}<B4+UF-z(*q$Kx9X
zq}!K3K|FWXpF7<H(F}4GL|9ZV)3xq5(RU{0+yjpE^=5^7QXmAwQ<@>WRd+@hDqWwR
z$9Pi6?3sz6&+C=?U*ST4N!+S)gtk=owtK8*i4Nfs*MX7u1`L7JtCK5zuTNJV@?dkz
zOTg$QV{jVEGQ=3`*n`XGsv_1S8+CJ9cTEUu&tL`MJ~wDg0R4@ne^^pKF``Y2==(>O
zBgNLy4s43L>3y&OdqMzPtn7SL0Um#ua?Zbw-LXU_Z+oS4^KaN2utRm_JTEIVEt%qR
zb=HFJ>yh;Q6Z~z)7B8WI;neTP#pKc%kfX@N<N<rLEvgIIdZa<q%#7qJ_3!J?e>juH
ztPjH7$vcVX0@ONp)6_#442tTDDO<Z*D8FgOjY1xi5983qBmIcqp%Is#57;d^_(6rr
z)u?aPZv3z*-`n6e2#IQot$FIvag>F<gPYs}9e&~ZbTF#W``Tq7&xljj4s68x=HpLB
z_-gdYH14y`U`;CLaNr}KaCtCK!>cM%TN~vXZRG4Rsa-py&5B8-OKIQ<3AS`4RX``|
zbGGZuC!00dGS_%_0HP5G1a$Po?<buo2MXD4jaOEF57}kDbWG@W(zhc~U!O}|e&ur;
zFeY7O-anSn$l+f6Y#hcu6lED%?sG>V<noOl0pNEmG?>{o^;HPm@c8-QcL(YJ=ms9Z
z=jAVPinEu4Tj(P08ctBlJ<(%$qP)(K<TUqEcQ4bL1-KoW_4PK{zH4t?Cm;)?<CG8X
z8Byfvqlt9y3j4OpMutJdN~4E82u#&Uhs)AV=54hV2feGj5eGe-U2FBOGl3FishNQp
zvcA-luEx$p0Dm5Wri--^58S_pK#C@GS1x6#)`}kC7P4C}IkX&wx4Rz>^gFGLjEs7L
zUaOVT$erhO0*Q_rHR{vUN1)AbehN8|0V%aebs`Mkd7{>pOkqAM&7ZU5k=+0OSm-m?
z-Z`1#_|6e?CM@yVXtRhTTBI=Vp?|GJxO~IgxjtwS?Za(!LUD?`wEwQYVyD%+m7NOw
zLqpkPq)0#Bt);Fzf(IJ5VI2s5{DzZ63U*$cyRF!y9l*mE_v)mP@6iLAy9#3|nKNY!
z#g?zvQLarWZ+c+lKRs1KoWI5So@_`00nvUIs`Cutk!yLz7sO#F0EK1Xs5qqcQ9>zj
zHOGdQ%>eEZwAUgUib}#p^{Gb#B49=nyVPJ4y^*rxyE~vbOsV(zqd;LYcC-*p<yXka
zQp?7R*r8K>Anpb>Kx$`&<Ck58g78N29DpIluX5cs7ZtBctOJT)fYN9+R^Yi!7u6GP
zJnO%*@|DCI6k<GpMCr=a-C8*qX|#r|)J)>?R)Ic|3(a3-DBH|jgc_ar?N0EJn=d%S
zQwfI%0apb94HtFd2MaDA6Z75C`JurWHt)GOJTv?e9MHECG$=Myo?5e)Lb~W!r^8$*
zJqTG*Bh>6ZI#5@reWeu_DFAt)`&6UA)at3onR~0J|4paA?OQsv`-o9=+n)30h3>vN
zbo+SgYXi4rwe>t0kO~k$Co^3BBPvxOj;F9jafZ=N&l(7CXP{}^3I^%BJ{ZP5&#+3-
zj(ejW3qB}J2XbcsfBf70Z=+#7MZZGVW~I2+&Myxk>bi?tghe#&U&_}Q=>-gu9o6DQ
zZIjV_5U$_&{inD1@9_?}(Fsm6c9><H-cvocj)js1Zb5QLWdgMR^l6Jx*+MuR6A%mg
z<kaG=JH!BM*JadHP3Y%EVe(P(h+Ml3sxEouu|0N${@&_B7axBKL>WkUC#r|_0eX3^
z3C%Uoj0}SnVNLT48IBq-GD6vVnU~;F@XN3}V`~ctAXtaiDePXB%QHL$UU1#K$l97V
zrG>6pPOHOlh2wZ@3(hubm+u@cig{mVC;u`-HuRWS=;MEu8_0oZ&9-#0WFC=aKQ7&u
ztCKe*b(y>vl9}X3jk74si;FZvH|(>Uz^oZHY%JA<pu#G)V@<u%ApSQ~esDlAp*Z{S
zn;*T7^1A35oO5dDJRRm`6WpoF*^S!^A)OdxG8R@#gpp_W-+lf1G;?)cfFnnhk^!@F
z(q^lrmZfwkmP=T&V=KD9Dq9s5y<Kv{Mszn`l+hYz+tUa<bG2VkaFbYakv_wBh&|?*
zOKn1^OC3UbBONc*V4q#MVH$$(fm(5B+<%WuZxtPtKgfBf{y@Z|F^tpBgNGCH1_sUW
zoPgSPSwnu>ETL03X+MVc2_A-=!pkr^!sfg6L2d=5*TMnU(;D6h+t<6>0{<<>)h012
zfE^fDRrY{#$PKa4f`fJ+>%}oukt689@j^bAv0(TH^{sEsi<<g8Xh4vg2NFdAVN(xN
z#;9l}54!KP3M;!7R%Mp^F7W9epmOVHI@m#oinwGp6Z<V1A2p8!@w%Q<95UNY_^m7<
z>Um?;E#32p%ssp1r|xykP3BVN_^2=(aM6GM)(#b8$|%ffc$k=+Ux0Yi!5?sUqESS9
z?n75GeUcwhQD~ehUy@67&t!3@Q`C6r_wKP(u-u51RYtt{a@NAs+kiErf4R22QIskh
z>*3+)5ViyaUMV}7%>E5xwVC5YV2kYk?6N~i>8v*IwSpB9U4QXuyu@d4HOKl|Fql)2
zBq`QK8YYAe$SV|l$MhwBmW<l#1C*KD(7=Ak^DiWKtW3=(bEiAy7p2HTJ`=0LH$#}d
z12zui<EPcG%aW%tgw~*!rZ>Is`e+`q18lN7Dgs4%Fw|9kkNBHYNbo`_Fb7DJ9F4}f
zT^lit;5on8AIxwOC2w4W&81h@Vs5ES_}jO^gDjw(jV>#>VC8|H{N?M;Ye@iWbNVcD
zSNul^3Ud)@PT9bF>({xxsk0A7=~=Dd9wc@w##*%|`9A<bK)$~t=59kaYl{~p-Llra
zpW1}pxUT1j-zSQD&`uQjgXi;UgySEp;Mrz{`Ab=!{%irqYWD1xP-eKH6(W(OD2hbo
z`g77p!3!3N@lF+m;in~itdxxAS!z4KOJ>jQPK2es+Y?f`>+|w4(WufoFA{U8tD&$(
z=2{xzZ5!1d`po#zyaZI;2|x?UC$Pqyo~WfCZ=E)O+=<bsBP%cc*?GwB)0pqN5TACy
zC$ka(Z1yxI>V!WnRqmj8c=b%(<CXRg8(bTyGs^vqI=$N-$uFsTQ4EKohRyrACjQaH
z8$QrK3_tIaxp$3?$M~LrnKp5fG^ZdU%O1~(wxim(w^)+rJ<+aa5SnfF6cFm0Z0jt)
z>ge2#fXyH!Gs}bWx>m5TM3o<vk<uF6k}GayXCSJs+Geb=&-s`ZZ5ykmWY!<CCwpu!
zi%U8M6u;-%1KLL{<j&mkkFy9(=x!bwU*|G3!9}Jz!EOB>HkHoePv|<Hl`T=!_%#J^
zoHUF1%7=zJo)b)%B|L-MlDAg)<}g+0a1S?A=;@*#Ux){9>p?`3SpuP$2F03!Xp;Cz
zT!b|D#T(%!hj5&%A?9hzjJ2qRU{)!qC`E=s5qduO^`D-C@<2jX0S*Rc2?Yj5^KVa)
z1X`MU*t^S^x&ysTebj-jo<LWCjhj2r5$O6acloY1p|mfGTD_Yj8vGrqzfsVWXAfPp
zA05R7Ze1vR1ROCjv|Txuj29u_njQR3H#d>@2HYRBNT`(%dvY4++X<@b?7GNUDYF98
zf5{tQO>fc}+q1&{h0FdP_~%>%Y2dI$AtI^L9xlRW1!d6Y8pON^UDpOr*FL|0sZjgD
zn$U+9aOs7owWSnPc1Re>ux)p4UOPl|y%OGuHU8M82PLj4yO1JADtI~ZM}@8e7kk_>
zX2-Q`Dh%Z2eI2Frq62?yQep%M0FI)B+h#K`;Ug&=T1T||a8<C{z8=YBW(P^V%!+#Q
zn}$WzuF>RoqXLf`tL#>Uwi3NcG84q8U{*E4sobc27;~z|^6v(dNY-`V3c_*<@ltT5
zA`of<IVO=N1*+Pt2_lA`3BL3b&?8P}ek#lV8sn8Zldwu>|3O~N+xZFiB$`1MJD0`w
z2L=l=MHSpN^XDjSLG)M`h`1mso@`nsfsL&@V4(SwC#+l1>>V)*!ZL0!*U)T+T(y}+
zt$~rf0Jp>R1~cl{HNYY&YUgs$mfodLNl=OEvmQfPEh+|Ndy4%3vi<-3^)zrO3=91a
z)32~#U|9cVTgu$p>AzQs0h;>i8(OHJ@houRwgiyhLbjdQl)@n82=w3_oI^TY9Qv6`
zAE+nTu~(BRCKT^ygBE{yf=oc}Z;)?6Z_B6d&!f1zyhI6J9-cFS@0V*r^TuDFpESYH
z%^KaY2{No<$~uB34V7jPeAoqdu8Kqa+?f6#GbkVE0+jv2XjRPB*YBa^Eb;@Z8n<xz
z^n0sX^~EL`mwxorx0wyS8Vw0`I)t<@dVM?)NuYgS2y&L6cpekYJq{zh++&<tyz@b`
zO|7TEfQ97WiFRjL3s|x*N)uy_a)|Hwo#`XJ75PprhBeu>x;~6T-DQkS5QSlcHR((n
zcUn;Wr)xq*$!_!@(QcD9%Z4r5oU3@F+7v2XXQZCqvM%hpuHi`SCcVkV*NZDv&#7`g
zKDmNM+8>}cx(RX<dVq~#HN~l*RkVC1M2by;tzjEV(?-7C+7E>bZKSff*-M17K*zA)
z&ZlBiZlQ?=eS}^t+){si%PKiLh&<VuZwS%&N*hm<tqwU6Bk7$3;AgE1p2%)ndyRV;
zX_IWx5d^Y!-q3Zn-murU-iS_ioSa^q{v+``H=xjKt`k~y?YDc^X0e|Ed6>9a#_c>_
zIf3`xrA513-OSE^i(GuV)}G+{7YPD`xH8Mp54Z9j^<xm=+F@cO5}?$=Hkeogc4=FB
zi1V)(on!e&cyv|~2$2;9O~pcPd-IJdsx^d@M|id=^v@o{aJ9}awG>G`J%lSB1v?Lv
zNe&JGiPICNvdFj)r;)1qkg#OVmqkt5%8}Uuha{|PaoJb%?LfC)kfi1a(;2eu<>pTt
zhHlzq6`Xm<g&y8;-H|Ado9-xsV9!|zysHvezfX~&7of(oWfK=!juvN6S-fBOxvqzA
zv7n7UNINL~9#+T$kA}%V33wv;*5NgEK^*sl)PoyBD=zr;fVsz(mOX4_+lj2UutZuT
zdgDRHr%;95h&Z2;^-a>U`r{d1JgebFBuG*j^;0tfC_9Ww?E{I1fIlSYI4^$>c}wh=
z1X?EBfCf(WS|9O*2VR9B$?b26Iyd9^=ZOCOgSAeyx0Qvr=PD<bwk#F>szP4!VQ2^y
zxcA1#X6S#%rd1e|MoONPQ<ki9fGP3!JCix?^J!Q;x~S4aBF5lrK>y$1e2S9mxu7JS
z7rxemHOPhVnpqM5=-orN*o%8NZxG~TMlU|G%41asU||b@-|#BCj5^sY*>lJ7m(ov0
zXd>6#U8Lgn)bnN@-pnG>t#_3UeXN0lQUF(kjQcoqRb<3sCGD)U)`Qg}bo7vnk`C`=
zpHF%}Q93`L7$&=Ut`vu|Ws~5ic-smc^B2;}Yj^;bz<_&DJb9W*5j&5KBQdmViEo&*
zv2^tZ?0@P;Vz=<!^gp_B1P2C&^WSyjU*7QVi=*#af7LO8gkOwaoB8MT&XK{EhL#@P
z#F?dqsv#_eBO$Kj<rNcmV2C4E4Q(-M*{Zs_iztc<8ehR88<;B7i-2F83qgwj^MHIq
zX4Lpo$E$b2ndiTkufosA`^JIL&bY=Gzri1Dz42)@bLVfUmFyz_#6me$oT$5K4e(j)
zovb7d1xN70aFz4^g2Z9Plu;UxLQ<Hd!lJ-O=fxW-H(*h4ma6RT#BkcrjyP{83n?@M
z3<PG<4{O3<VIY`or)ml=7XGAG*WqQ)69y{rN%ZI8T)Cht>$DzvFs!Dmi#y<Jp<a3n
ze-EvU1;$j12vBe4vZd2dC%FJ9MC{ws1?$L)=TjkQXW#Ah+k1S9bub4P9kl|>Q-&#O
zoiIQpWJ~UrR>*z^w)xygaqipoGz${{2t>-fvNrC1zve8e8WYXdzA^w#JJuME_CWPS
zYzt;ry$zsx!#F~%$n=fxDE0<(@dsh+&3qvPS%_dZO|(%|jU}Xhl<7*MRVK$@X)?GB
zDNQy!OxI`|ZBx~u#QiQ1ufv$VM$L~6<}|rjhod}li8Gw-)B$wG^Gva-c*^`G7u1^A
zpVfZ+cbRE~t>0jwq5USx#Kg>_X034i6W}q2?PZJG_>V;M$74Dc97Z%SJ-ly9*L~m}
zE89!W!LL~rGLR;U3&!9_6cL7#noBx8v98bB;{^D(2Mik>m^Ble4k39$|D@Eie&=Et
z?n>t09w~}Pan>IB@gh4UL`1TWfmLwUo-jlCk{h4~^5svrFtTlRPYgZtcnWE-F>Q2=
z@>qo4X&vaTUe}nQp_CXYsL#uB&C9VX%9%u0$ILe#9W&A$u#T{aCcoB^k$H_-9H~k=
zOK~7hsfOlP$+4whFzurG-d5-caE`hh3r%hzOQqh<WjA<By+YRJ7KlmbvOEp&vpomW
zuWixG)%;KgPC2lUhgBNl=yk=Scd-p2N-48uNACN5ZK=AV%ojdJ+D`n$1Bm6{5n_5A
zh87mK=OVj`jver%<q=C4k{;OGYzUL+=VXSu%+R&SRE*{|j>=46@@ee*X$d><x1M&@
z*7vw8=eLxG-uK=>a}j?8F&|Z+9@rE&nFjrcLIzlU_gZkVVphW#xy*HfXZbZdaf)7I
zs^BEo7f|6~SwN|U)nDz_e`qj(Nhk;V;z{QXBk$2dsN%mw{~{2194R!og_chxZRhw3
zFMp3fPYfeCgE2e`ncIgL?pwcdL!M1W@&mg$0zb8G42R;%kE-}VMBJkJRfe`Ivi2wY
zLSrnh%h?IVpp1S-14Utt*)hTc*2VZ|@K#hMP@|2h7{oBXKtU<3t&+3a6x%sW+sFQt
zyp8)8vH^A#<E+Lh_A@pDLGb1Hd0CUQwc2b4lCnwTZt<<7v#QbEy87ditmtFf<>(S+
znabCXAf~S~oF$A4UD6-<G5i=eC?Z#P(F_b3R}$8hcQ~V)U;wv|s&|E=J!GQZ<@;Oh
zz_ATDOpDPFks3}^m&|6sUV8gi^!qg{wnbyk+8Jn=yLmwx_6w{8o+34-DCLWfoIS(N
zr}lT7cNEv&MIVnhULC4i>I<b=!dzkK{g)<|VGu>l-ivX$aMj*maP*41?di7z;}HUb
z;^z4<(9ci=Kk}Ck#Q&5)F@nypA{a0*H>Cfw1hW6TcKNGMxP%o5o8FT%2E|Q^HqU~u
zZU|?Awji^GyCI$;|D6gPeQ-V7wEcIYsU?R1BuMkztGM;CN?I|ed7Zw7X<2e>Xtq*0
z$hx5HOZ*Eh=!UJ=%M%BVF#$+&mFG9zalCoodmQ-je9u4ve$q#?QI8@rHy8B|t#sCo
z0*coIln<k4VkYu~So40zej{k&N9Qe-_>Go{;hZ+VFc+K~@0+`PI;CxNw}D2AnYj}I
zE%<XEDUkk#gQ%w6m<xsaCh?3!t0}NHTgF{5dPL*C{Wk{x)1NV@e6^&&q~E$kT{rF!
zJ(WSyTr&tCYJapLel4S-SYCW=J0c6fx>|jk1fl42Y)3vEmj)${3)ruGyIOQMv`uOK
z8y=xkz(dEyiL_cf8R3R8Pr^k*D31>-IN`38*;_LVGzEO02Sb+rbuRyFhV9-*PHsW^
z*v}8Qdl|*Bl@3c*XL~dEREd73>8Lx)6X3-!g#OYhptwNPizL2tS%o_kV51~&ETJ&A
z{xsjSyO{cLc;H;3+QIbVsj_9sI6o`BUzxK4E%*^k<rZ?;u^9CcX_c285^=78U5hkd
zdYFOP%*Hs9?x@1H*)d&j)@V-2TB9Cj$qYe9epK??P4%%}`e>}|_dR}VRjL-o1;8Oq
z$B)b3EF^crrktgCm0(>7O8K06zwwOpTrsRS+sWt;bi7RWY})~ve#2V5^h)5Nl$^oD
z?cD=AZ5Unxe7E%<Dhs3wnRQVfcF87s4Vb{NjI<#>$htq8$X%6IBUsv)e;t{T+AVFS
z)7+t^e%tZZ6<)oZB-L&}1={`bYX7-uq%n0})lS5)^7|^<j221jZ+a@sXYk6>2am?R
z4CDx_{U2p-^*>CIF)vLK2hy0(pJsm?J~aP0!KB}Ov$H__iTpuy-8kzGnc=+l+s}LC
z_xmWEzsb+wBHQ#1iOR*FAt+cya%sXut!ZOZ1nr>{7Unv7^Z6Mm4$&WgoDeLmKO`?@
zAqh8ZRW1vlF6xxRC|#QdYl0QYfx`3jR@Z?D@m|aC#T>nDDvfq}rFPaHTe&@C>{S9Y
z$hlRq?hB0;!oS+ey1L%x7G{!_9;?F0Y5{Kaft5YuSCs|neVy`TC|FH)3XveD(zR55
z4$#bF$-BHz+gs|f)fC@RlSqq?pXQ!pzFeJ9Mdfa5eo?ZmUXI4vQYtrV#^d1_Rh@!m
zPlSt;oaV~MPRNAS&t$EWcHKD_J1l<rl>XnI^XbQWh1d793eWr&Az(y^K-!KHA!A1=
zJ-7UQ+WD0N^HMcI?@MWc6f48~z?*)P$Hb$~gya-1-0aH8F)O7wLM<~lF5%+F@xu~+
z>#kPn?(vEWGBj_hxnV-BnZ3@FZBvoM@K?Me%)YF&zR=?WekFHrhum)tc)V6`-r3{U
zu=dsVEyG(COjmj3I<BSRK5emYASo7jkC5^hJww=DMclnRcWeGvWZKj|$6pyR@$O||
zStsaMG6V%QdEMPzKdDmAWRD5T)4#Ubv*w8diA0fug0~y4A+3p|77g~wVcP2B7GN?<
zkxe9b_<Q3ve&$2AQK;Yj{F~qjKd#jb?%)jFvQKm^p)Xw>*%=Z5JxGVK&mbG?g<W5x
zYCQZz9!u_Np{=6q6P8RfahE5>X=z4eD8&gp(>wnRdm;ap_F?!;md)iKYMAu`G}yss
z`B}=<8oGQ-?pZabjsI&L6kLaO^SG=7zN6FaTk3}A@Sc7<CDjb<LTER2$eoNF|EcNk
z56|{#Gzi_>ukmZpo%v5(x$&OOFU&p=Y1SUWH)r@nnTZAK&;;xQ_!Ee?ThjPFGQyF8
zcG!fQ#}`^A;sFkC!kJz8H=!6eZ~SArj$j%WG-bA7-EOJ1jZ~)FccDPzz9zJTf$&G#
z-mtKCNRJ0kif^|J(>fUKDR+9(7b9M`LbV6vUw(5qNnL8x6%iRz&n}sqLn?!4AHm68
zwAZ2^^fb%x4~QQse;h-=L&W}YEFxNn3|(S=Lxv~h;tkWL(U)rOCmnuG8T{qKa?4UI
zBAfmzt+Zayu{%l9U31j>^o{4qy>eH_N^%bOmy<03YxXm{h^N>4(Ks*ZP%*kV{=R&n
z(eS#KJbzaoZ;&PAIO$-aB<ni<uXh#${|KWo*|)kqM`7~D!9Tdye_gucQwBXCoTO=q
zNj;|+SJ2GLX0sTJi+eGnH>Yq9uX&pYN1os=*Wt=2%?ilqocB}|7p>T&ZDKjD3WvPI
zZ+c!-DZ5ZahHK_v=GqU7ezAc4r>zmPyPixyf`N&_{V$EczijQlOij~Ioj?=wYfEs|
zqDj;{JX+HlYn{^?oqU~Mw#7!T*e1k2)~qIP$`x5*fcdJ}{8uvnx~F_r5}rtKLD}Dx
z+f7AzBZVK67ah;tfyW&=L0|9hBw)~jk>*G|P;>~c>e2{Q@NE{7Lq7Wb+U#bu6Nxu1
zq&u?Zqbe%t&N9>q$)sM;Ymw#{`fM7&@^ZPCbZ{j3HO>x0dz<(Hq6qJrl4W9hM-4lq
z>Pc3;X28f2pslt=kQfG^1rOb}LXSl=fP>6;2-U4)-%+o1f>dBLaPX2F<L}Tuv6W8w
zg-t?p?lFE%u47iG)yAf~8;@T4=PYZEe!p#6f0gvYqQMMxhMkqOuH8m6XH%BCVa{OH
zITS~uWoAUzG^&zu1&3tx@;C5!ZKRZ)$0vf#TAMg*IckRV+Lp6y2e@8}*!C_M53?fI
z8*d<s_PiI<5=*2rhT_$-5(1U;nM(g@n$$cH%_S9SDW5x*|L+Iy?RJQ>okGQ0t!$aT
zqQlD3`m`;<j)1oV_Sy<kiYaw~Q@iUgjVVX9J9Ti|Y$Or72wcULj(x_1d!LBY2bGqh
z*4et(+TBLhZhRB(*16sYIyhY27D4^vHl(?mTV@?CQM$~L>QKb?aucOZ?E2BfZ?WPO
z0i#}TW^9zO8?p<8qrRrPzn7t3mHM8^e<#X?s1YR6=?oWzVlm3EYTZCyv6Xb5AwE>D
zGVhF8HlYV<IFpDx0-NSm_d5wT9r3q2+-DXm^KGKPKX9p%w4*2lT(tWaw9ekiCfWMo
zYdNMwT+WYnv??1GAfR%{!iL(-k(bVk`Gv<k^vKS^(nkNY#V7+CgWsL=udW|%uID8v
z?ym1Y`RI<M-=6)~-eqU=j$|It<!s>=kTWL;$&$0Iy9FgP<F$X7WSL90C9t|i&Z_S`
zHzr_{Ws_yoltA1v$fiiz6#qdu9n`jmCvhh~IKk|+KH}>S$_0yi>8}!+p2IjsNxI~%
zOYxA^v6@yl+v2n^?vxj{6Yd9WRECi{x&^A^en*iHN2YcaVomaQCbwrzB7fZ!q)PG;
zNlIK@__mC#i&(YDR&r@R*jn*?OEH>OiX-b@IAYED+kx_y12lILqU`Y}1xn+$jPS>}
z%}sPLi4byNf@mPc%gzoQ$;ik{31rZ(g{U?2*g4-=?ezG>601EtYML@5lTHG&2(Vz0
z)9V`++e-`xfBA{`YMMnxhu>WM=9$0CFjJ|_Ut*7ZBbLS<6zvr)0Mi8vB@uZN%F_q_
zLacYQwNSh%pNIv4QEs35=;Q(KICdmokpJ2Ky7=`Ny?g@$!>0fPL;v^o|5xq%uhp;7
zLG(6Qa`{YX?(%BsOs}R%XKQ6MIY_g}(nhi~ug-9QSKv-(66w>eS)L!SGHmE-Y|vMS
zkXtB`%Sq)lk++sW2bj;Bg%S^QQqPL+%{Id$`}_c1d!>NnvSrA=a{ZodpWntK{Qo|^
zUkO=FPX--LPG)6w9UBE5ae&oo?vqUpXiG%yYC;+#7Gl<=C*65K?!b|mDE87NIN`l`
z!jZ+?5kc-C?{a-8!tD&~ARJyF+&<sE!1{a?hXf()6@<NHpxWNniNCdxej2WR6%(%P
ziXeYz!^Pa#qV8loqgXV3rO%-p-iq|!7n24O_7i_dYJ3>`d~k*r>>dLb_i)(XL<)PD
zi+Om}5`VC3$noiE(v6MNVE!tE2ydc|Pzfc|1c|dVWmMRd`y58<AYqADq|W*KXp9xF
zt8kGutLkb_V3hN({mdU$$rv>ciR8q!HHF(cQ$y3SD4#%T;SHa%jbda*uAKPphL=!W
zn>doj-V<%9Jz*o`(Nc(y+|sXND#3+Uxu(Jtp=VRBm1IP<ZcLG27k0)kVOWPs2q=j_
zuh^{?KRlW{DYQP9WKOcbjEk>Rr9D?hDl6)U0vPN@>@Bq{bK)r?w>=%ROX{eLuCept
zCmb$UQ%KVa@vbN3%O39GeFs4#>hNRbj9Tnf7}#-U5GxGoZ9G)vR1e(7MoM!G#9Ke6
zKjqiPU%tPu`*uA!Z7sJH`r$+!+ZD3bQRIyu6;*7Ca`K5yNo<n4S=jL>jbkaS>e1t7
z)h;@dOKk&lMiLA}*0ZBXWL@c!lv*6!BCt``!W#PGEy$UjJ+OmhII{?2hrj7rG-@CX
zp-9pscjSj`he&>~!ebW;(a4Zy5fi6}F)`Lh&3Z>JPx&eeCXPNG7iAgv%ZgnVF0M<`
zC+UyG5zL5;MuS5e%lOhV_m)%!V3>#9PClKOurtFdj}(oCSW}a^<(y2!Q8vcA5)^m5
z?4F=G5<H1M3s%_c;wgZdT=0c}YjTCnswYP(f_O7SOfDJ2txe%fnafC9g1g<j-nBKq
zkD1~aw$*;8s`qY19aoauFcfKb**NjXS4A57dw*AmJh}|py>kEl`$O>A9(6RN3!$iq
zyUxhPUn{wWB~aTj#=OF8cx{$uKRxta%W|saZ-(DmQ0-RrSOmZq;8|q?$1izF3j@3$
zow6EHMb_TJk=h;q2x0Nhqi<nt0?^l;EgAJWh+(m-El*;?FMG2?SHp2EA`{Z6{g{W{
zM~8RwG5RrlB<WczB(XmSaF0#OGfiB&cw{#Xo}jS7w;l6=Iay-Z(nE||ONLsLm#&zD
zi19j4I`{L1Tl3In6lNyx=bV0AUwiE5!ph*}tdiW-I}H)=AqsdP*37#W&KD1GQR8LL
zS4PtA^j`5bL`Ziuw2;?nsOB$qJ|CJoi#l{TyW*SM?%bH{Ck5d!;XKZzFowejo0hlH
zZGKkSzhHl}plo!=0z8Zo!sp`kvL{`}j-Z>*l@!~MZ%mn(Pfy5xSOOKGgUBb|M*u0K
zj#HGw=pBP(G)t~EBKZqGnUO~?P)#GIGlG8%m=i42q&vE2<8WGIU?(p;#)jRLkng#!
z=J-Se*pmq4Ni3c5b#-(8$cg_wjRG+Cl97R1uMb(5l4kQpdyVc3S6=I)^5^b?tUbdc
z*w~=@tGyQv2L|ELsil)tbbK4Cfo6nW(Lls;P@z`Nne?Vpc@6+e)eb<|Z;Iiy8yST;
zPt87hB$b<OvKjzUcT0;>zMMU38fsHYftLWXJ}hnWSkmCxHw5XGIhr22_!S3`ClMy6
z*3E<T>ec~Ewfq)~HkHhqPL;^yTCSXCm2tKyQ9C0)6#a==Sn0AW2}sy9Q9Lx-DyOJF
z30X2LGdR0P(Rw2@bRnW8wlxAEen~lS-J%<rNYPeSf;tto@^L;KKxf(;iY*ef#?H6w
z07WiRhJqADuS!GEI#Ys>Ek_OOM7dKO)~kn_!w@Ii={-9Jn^Ekjn))Ty`GtC0&e^w?
zox$R>OKRd_nasoP`^t(U+gS}Zs<?yK3a(%Ou#QE8oaGHhas=Igl{B?~7F-cAL`^|Y
zwxFs`q#@VP#|LtZ<HVkdYi^~9xO|^7w3^}Z8>q7Ma!@X#Ldxvix?(2R4aoPy5vYxc
zpJVHv$6BZn8{788_(hY%2C|NB-egK+=$|tFth{H>>IdRf+X2j>bEu1TrF+Q~k(!tg
zw+xW$mg)~i7>*rvx^oVzb~Nsm4PD7SNT)2aqY_F{hFJ13tPuU&{RQ*!lb0X*w>0Kz
znsahnF)uOp^E-VkT(7ir7FPqv8cdk;yu45AOCA!HGS>2<Hb)7WxHvu`8u_IPSDg+w
zI$N(-m)n9&6uioo+K@{yB!&b%;<ksXjCfl3J!7E#*9KL)5K|fSx>rvME!v)8mY__Y
zXwz9_Jv`f6)~PgL6$H8b^Xcx0O9Jd3X`t6)#DHaiUK>8}QLld9ArXtWtt)8QI(g2y
z9@IzdJ(*=r<fn~!Uts6(#apALdDAA}t<-hNj7gj5lug5aXb~qL^ZXCC-Z91(;8_zM
z^Eb9_`;2Ydwr!oUZQC}_*tTuk-2Y~G-`soiCiSJ#Nhe+DE_KyY`#1P5tJ!(p&M=-g
zhXpPQ{_L_QhbyZKw4*NxG9UG<Stgw_d_l}u4V`7j=&W8a6SAq8XDwN_v&LV)8kF(P
zht;&)OzAE5CONCyu364AJWn*|1=E(ti<!jER)sEKHXAzE8}F&@@q=GXso4PfQJx{7
zU<JAyR8)8ewLrs}bxKDax_`*Ku#Sk5wQ=Xq!^KUT^Osi%PgyZ>eoamD_AjEDX*Ec@
zQB>Zjy(6b}&Tg<$9Uz$d<ZN)RQY`q3kx(5Nz)$;ju@`jgUl2FIPZF)F*jIS$Ymav*
z@M{>kVv=tFDR=on4ru-`VE7nGF!L}|+|WO?dBIq<{oPb5Q~6HFkAo#i<uDwwg<M-a
z5!j8(H?_4@wRt0CNg9k*`%=k|9cX;`g~*qd>Fhcoa@7OmThaK<hwL^(>^5TjFGgW}
z!%EWpr>LJ-%3~5!Y6JKG+GqJ@W4z?FgY!ndE<&kjT{chMlK0_bY_zi38wa@W>2XSu
zvIne@hj(4D(^3t}a40640O)J$F08EgSsUAY-8j9bB&}9mL7~3brt(}0z_QU8<%jM|
zo(MbA^lkOKblDGxtswVplHLS6w*`VRIHZRa{kMDWyuldlAqbu@{y+pzh_!C81kZ>s
zlB=qFFq+yP5K)+3X{rpl1IQrYW%VvFY4U~{IQKUoLm}`7VLZA>F0i|op6Grsf6`_n
zqn)^!YbgV84t&7{6>q>qJ5W#&{uqF=Yp5zerzp2y{Q0^VcUVMCg*B-Nt)83z1s=JA
zgm&}?c1Z!%4S{5Pa-=Jn;Pip=-*ePjthorvEjz{tAk#r6Z;NYI&(s^5EL>k6Ror0>
z8rebds6D5u_5c)M1#@~!sby6!o6QiUcuqRfCcjSfq?Vq}d0|(fnmNFw-J(3Sh)%BN
zj665`fTo27f3+T#PcqCJJOk{Gq~a9psF9<yN3fA}CXY)U7Fq51=*K1gvx-;YI&+0S
z6OC1UYn#jF2od$>6Abw+oOuwZ5EXZz#1m=yhB@%|r*@bEvENumog!W17qjbL@@iDl
z>`|VH{})>Jt*ra346{joK0lBAl+;`~{p42j!WF&j{rm+mvk&+I@egH4A~wr2%B?Ge
zk8{{XrgapnnX%yAW>8d~@O^ClhPRz6*q*+tx43n)K;ZI;<7~F%S})|ioZ~y4o8e^T
z+JdfCw?L9?(<k57;;j{?SK+R6x<3dVc75~<PzoYfW?dhx4ld!7GR8LHUim_vn9YyD
zX#1!BEGajme+aK<>}3kgaEHp3C0`^>bdW@7Yjr1UC@cb5ZFM(kV@!%@2PI>uybB#X
zPS%Xy$$DUaH@l~%WKGfovAZ6A3t+#syOS-twV6!fkq3vJaC5nQrJRiHUZhRnyMD*_
z0(y!-&@VjwjjYlaB$4li(I3|8o_MZH_+=yZ$#H+ik$K-WarKXp*ex9HkuAQh_w!Fn
zdH3EZI_xg2TEp<kMmi)B!n5@ce7P=Q8j1ua2D|%X`aM)-MC+UAWAvHi5m>~nsl_)O
zo=D?9b>GJ{qOPPrZ#>0TW^n*x6u}^c<@kO{W1ZLfuL<Dy2#2ne7f=Q#gPpzr&&>F5
zuurBeVDZ>3!b7a<I{LUZ(d$M>>|;Dtv+bE`qstRUyIC0b)t1glKkB&WZzLw%4~8O5
zt@Ry8#H3vmCPlpf)O6xet89V(Vw*T1s;WQ!d_;nAt1r8U>fn&>L=Vnyb2uOrNSF3V
z%kgzRT>o>8fzR$PB8kTH&HK#6lV5;h^3!m1+&i5i;MVyCH1i@%=NpNNzqH72z;3@x
zBPj2_@H;!!4-Zm>iX8PgIl?=c;I$HHN50E9{1H+Gaa0uxUyX=ofb_Gya!$^lVzbXH
zbhybSGA~?r;~V&()%q33<~(rYTX4llxQoA!OSxwIwrr60_48b5^4rfb;P59&1}pVb
z{(?upP#hV`=^@OO@C^oJzZgDP2@gt_5fds0>IAeu792E>$zi3>S>PJS;_q%tt+8nz
z8cp6Y`|KfjS8esNwhA;ejGK(%Uc|vdVI{1T>Kv!c+zdJ2JLI%j=eXDdhc7z!Hsdom
zqaWmm{h-9hL*V*xsHTS?X5LrSDe&S&sHP86dK!;mt@9vl7u3`#UV4{J`f~94H=@?t
z#v@|tV@sN-9N+~}$mhDrtWCR#J9co-_dbPhAC=Sk@E9DY&W&Bq)}9v0xX*pKd*Qio
z7=s4^#gU-&I513+XMi(Kt#|^?ImVpkLA2Wj_Vz@u7_Aao-q63!MSt3+cSkQWXSzu@
z<|cKC?y6{|NU8<Rhkxl$=JHzR`}lWzk9;KAp7aYl?+d%y>29^VgR5+16;NCKrT<n>
zS?P|T!*8PK@PMv)n^(YH$(M?guXMSc)t0is=-(jhxxBwp7<*qFa#`ER>RNfaDo?D1
zFl9PsYhrVLD&^PUsWmRNpJ<SFI+<cU+5>#BwI8Bd8Rg4C%ni}ir_T%D+K<X|Cd<-2
zO57dm^edO!8pSV#n%6XI&BHrOH}V(>Z;>m{b^g%*Jx&gCH36yw00Hsh00H6tk2v{%
z7T%_4KpUtmqwz9ij1!Ra(?V-&K%xT1NewZC!6X5QvLNKN;EJCaU!<T(8H}(*tx~!h
z8(PC9I5pcC)v82L(BUwxXf-q~2Me7pyRXYi$+@pTdHgKn!I5LXx4quGU9Y*`H@@~B
zAKPIBpp5D|OaM<0fx#fXBZIm$g3oS^xPRDQ|L%-?F%b6uE^$&4_+o#s;XWKN;=k;L
zW5n$#GPv-Zi1Nis#TaAbuioKR-0L!k9eV4b?n6^?*6vF(L^i1V@sQE^>c{4;+(lz3
z-&OnYoc2F19KgQ+2_QH+6#Y`*(_3(0CSbX53{?-^SM5IOk1O7YW9z2>=Oa9r^<Lhm
z{cxY;^M%U)xOa-}w?E8t^}Lt%@J;jn)`HR7Z_oRdqx0i^zWJZM;_$yDL+{KHeu&e4
zRqdWQEH7^Hzl1M-7K8jf@9n#<hy5L-yngi&<R)Lav3F8#a_7NViC7bgB6MWiS@%~&
za>-d`Jy@A!Q1oPvV9&B*(lv3;uUH3am;w^uw4<-ZOo{OUs_bCK_roBAT7lPwB)Q?4
zAQ%XU>e{<RBwKo#ILt;37mcB{x$oN4pf9m0FDp@LcX0xGw8(2#Bj941u!CmZjiR!W
zhVHDG97Q09bcT#dnn@PcEJbFQUStVeii;N0wTsoHOZQ;e4r0acn-(sL8Y!Gf_yW>A
z*nl<$d}bgm1ksJnl4oekt})KqDjNl6XpQi(bVC*?NNNb+Qa63Wp=rCKvf*eujv~0$
zPG+XU?dVmL;^KOfmA&G2E0UVhXa-F7EaaT5=`(C&=WrHjNXtCu*vg=N1D}Y#r$m4`
z27=Ujwd+$Y5i+CDWweFDMq7@<Fn1tRM$yxAOM=51U??boF~;J0rMT}-h6H4%6A!pa
z`eK;10Cwq%p97+h<xabklP5lWm?VcvMO5b_SIA5@%mjTpZW>TtMuVjyD7^(`LuS6j
zfs;rM!^)(CLIP&TElw~?M{znrgmshNYOo{&yn`I);+jzd(m|#~jX9mpih$c^dQUn|
z!+37v2Idra3L7o=Sd&X;HAFH$oQXLOKYlW%MVO;~mB}Eb#8PzOI^L?HS86KC%#zU&
zAUDEn2Q?(oTR(4UR=Qei3+zs4qT^k&_*V%lznZW^y{1Ij4PMje(!4JdlWflA1#yP%
zHL`mXS#ZE2RQs}U9*Tvf%cLAWWBg2_t+;Zdh|sDrB<#8-1Cu4=Nrt{y4e8V(P>C*H
zC&@m}WKoej9as#_j-rVDVQf)^=v^5PYW;g~6IoIO^yDGGI=3OHQy_%3(ioII5t}MJ
zRkTPfZb(ubjAL>XzIJ9u!bOo(%-SfLek9V7Yh$kVkXlFf`zkC*z)?wo;6`d_9A@#X
z*@V_qHyUHOcx;1hauUPRD3G2!Qu0YaCN++V3t%(T`0Dk!mKl#)Ej5%gO;<o)3X`{G
zlIbI}>Ik<Z4_{iSXF0^RrsV=QS9u7(RZ%1i!s28QmUub4IQ-t80VYqh-z2Sr!7?P%
zN~)_vD5VkBnNB9<s!0CzWPj$gBHB?dQA*TAVNs}r(KL)JmG+)E-H#EzVA#|mgyB5Q
zlPe5LSrhfXaFAZzy)q2apn!ofu|&5pELtd)YD0`_B#ThJmMOwmjLgx&o(R?QF_6$M
zzd&OV9Jrdv=WU%wj~->Et3%tSQy5ipfSk)`UC6uDj}iAqd-Exy$*ZcOM{ageD8^?+
z7VC$DqK}=6e2H&u+Xt}Y@Xax!HEYfSTq`kUu5oBT)<_Z@^|%^!(5r=~be5<6jnTo`
z!H#taTinW~p*L=^G0vMCJ}wkQiRrXJu5#HT#iLSwzK#L|iJ*))zPvHGe(Q|MuV&WA
z5fyW)gTd1uN=uY<B~)8J-0Rk2jeh`5Gzb=w!?=T!QTT%%M-F0h5IX?`IvHeEzKkR{
zgIdcftXl=0NJ3zsy)d01JC)3qnUi1<N0O6CWy`)Pq-KU2vmg_b8Y|MBVqku^shFEw
z;j`X3;wsp*i6lRc%6;UHx^UsJd_wmW43LmzqZlx}OjI~}N>g>MfT$`{*K5>LDkJ3f
zIYCO;;yKiqb{MFPjgEBXYU|d`xF>goh?UFj$SB);E2ynoiO9iKPX_4mT-vDb?(1Z)
zXll+$erC|EIucF23xpxXAVq6k%D#@fOnx{0i8m#4&(Tg?-kOs>TGvR{GWiXUZ+all
z-5Ec3(qcSE-pEl;)P2Z>+dOT{#!JI@Nz{@$h#iih1a<-+v_)!NwK-FmMscBJVLv?m
z>nuz%f+tozzJ;XjJ`mFgTootNqoGpvQr@@Mbu^S%Q2R<9PD{(wmUVUSN#}Zbk$Jg>
z+sGt&F;)dh0cIr~sxto2Wm{e)@3(eUYi`Uk0IY0?k<2dusk5-e8->+Ivs0)qIASmA
zH9Q1EON|AX-^-}}bAmM__mcEb5W)NQl(hQbL8|!xK5@G|FTsNxjyTH%FTZ@g-(0Op
zwvIHuf}mr2A$6+O+xKn<OJ;=EOf4^X*_yUVPvdNsI@bhIshK8BUp<L@uF$(3*10@!
zWO13s#<9#%Bf%E(It0_fI=hw1DAWK9p5&sx(}-FuYu3$;yxp0)*=SnD?OtBSjv7(H
zm-)PEUpv1MX*o9N1=Wp!O+NNXJE{&c)=!Q)@ku!v@-R}i(=A-d!Qd5}J8rM1x9lUn
z@j)1RKZ|a2{AVabGI{J*=acB=o^Ed3JU4UOtPc8-CXn8H7%lnafw!=EiXoQJT$tbK
zbN@oM(y8Igot-dXe{F^iD6b@z(r97{YKRKWid1;K03UBprj5Bd=!2E?0@DYdj9dD1
zpPo)gkDyuj4tJ<t=~vK0I*!nP5a|r(%a@W8*AI&1gQ=0A2);1BkT_CIdLC1owUyuy
z-iSwZ_{n2HP==Pyc*-|j#V?yXHN{y)>xwIUT%6GqR93FW38U+C?^1PUybrzb?<~Td
zHJ;dh5N6aRBP93yU!3$BoQ$}v^q{PWdvVMW@%bQa2VTBX0S%*&gTM8Qz=maTfw<3%
z${kju&L;=%__ZaX95n$iiCS5>HH0m-haNmbSbptnI(3F+p$L}k0l&(LAgdE6F<%GE
z@hr?)6z|6eU8PSzp=ffKgD&vo1S6gxaHe7XyN6wh1O0#i<Mj122Ht2`r+Ubu%%i~X
zbX5(hqluSn&h<$=^FrJvMWI_#y~X|gN0g(@>*OuL_3h*x$*CJ+e0O6kzLI*X5=iIp
z>#zuIdbUxNF6}c*B<k>;Q8Xzwb|>Z)ONSh>gi!6-_?`~x18ueqBy2nGkU(V}Y73_{
zJR`DQCs+>Nmm6mbE5|SV;{ZFVHFlkc)x=>pr2Vv!4*cGM;)<;i6YGwch~5d+s|(?G
zCKZVR#L5ZWxrHI4SHr23grU`Jo$B2naMhcE4pq<L+QYjEGA-5In0=%v5{gp{`m@0S
zqXi#B0{Y}Fl4^HJ4T=NgC1kazQ0H@c2L|)oL4Z$I{f_)Quocf=E}n3&D^ZjeL%NqF
zBS;N-U+OWlQez19ZPBysrH8{L83Fv&DYdySs?BiuLl){ITNp3NCj78wUU*CIJI#xe
zKh4c*H)<-0jk4KE6`63amKM~iQOZZ%f@D|D)P}bP$)U5>?WeXfDH%-?AK*X7aY)=j
zuZsvB&K<vq=u`(}NL}FuDcXOZ3#8ER4;uMYZUd?+)H$^RnZx3gRmcg<^2Hg^;`V%a
zq}G|Nc*Gd-)d{D|mSP_7OofsrHP@<(9U()VmOVqTeZloxgqP!+QtrplKUz#YBPh<>
zvIbB(mMi<{%>p9C;9)=Oi|18y@F$f<_T7yVP>E0J#|whcWUmwtC%&EVeq;HgP3e!A
zrSe4cGmp?)&b4(e_~EzpHCDrpyJoKvkNAVpN_;gsAX{Qy1_rz;(`2_|h&vdbrH?mv
z<0hG(qAGPPcZX|>{zcP-U7KP6C*KxDJ7Rb(e^M4C+rvJwqdvZawd9KM<r-!1$1r(7
z9Ny7J{czF_KbqXYnw&zj&tr_&r?u~1+5p+t7w;a`QfS69Bq7x$KGw~TT>cSRCrQRT
zexr)j@6(ohNHqfPkqbRtP{Q3cz;<r;%akgz$)mVpJ(;`VSRV_P+EMY?5d?nsn6+H8
zZhu83&k5Wa^N~9|)F#p3dgbJOL~B6;ZI$m0BlJi_-G3Og80+_9vy;K$D2Y-350%B#
zN#Jml#b_^#SpSC>M=q-!>zsYGA(vBlA~+hj>WoNwGHaf&RT5e!J#{TkLiVljRkb|~
z*Ji2)Vx|_ZyJ?}^P$H(j|Lb4i|E>f`xI`uUL<0i4Wj{2wvSe=P=R(Sr6-SzhKp
zHBIEm-~o}8AS4w)3}z$(BMxk|90N%p9U}cJh!i#s3jnYfqBFJ9x7gLVR?umoRl5bK
zT@$lGC!=56UF+58>S$HjF0b5Zm8kXncHB-+PMQR6?5^HD-kffI@}F)w&-fl1+xxy_
zA}w2a>g66BGSkl<SMbPcEtaSf4Fd>{AE@{~5NDS>Q{&{1*E=vXikwU00J&5-9ZGMm
zwGZ=PxfD58R43AkB*P$7=u(PRkCuFyHuH3e2QF0Tm)%3+%I|>l?21Pv0@XX_bK0~O
z7>2s3!=^VIPMMrE_4oE%92dK#j&X56J-TM=VvCtWXO9|GI?t^MTRyF!%i>M?ZD-e;
z_yjq@mE+^*K-VA{`>DgncI`-6uDm?)$#;+(4)1p%2Kxk2jB$lNaT?F6v>?74LuHRA
zFVCzgn^z}^)a&>eIfk&3o#W&1B}Il;$$#2+Lq4d-?WoWM;G9(9^r=R1sA*6eewMn_
z!`>#_-X*RXsA<xUA*kw7_k0BU!?&yNRuy$fsH-sVuEb%NCjHM!J_3;l7d}C8ZVzF_
z?TTV{3-4z8eo?5BRD9KsHF`f1^!~}OA8+hE+hHrC-9BOY{}xBx@A-(f7($F3+i1Vr
zXI|gVU~<ooy-R+AQS*`yl)3!1PC*;b^5bfcXW!1Sf2vVmp`X9G`12o6(?>X1*_UhV
z&o`7-=jP_u*2h)W8c7@+^T~0;ZK}>?hJ+GqC(}ok#Yyf(2ou*3p(td@NkmtAYKx*G
zvYNtMdnQzlqTStVG*?SYO)uC=B`YdETy1S*o^`c5GqrJ6Ytk%<OCu`IV@EnemvEK)
zFFI>SVqCYixp27IS>0?b?F<$`d3l*w@Xe_kX0Vx1*rq8jsvWae7cpYU(tUitBd+Hq
zx-O!dxLm1>e6<M;Z=j!8j@zD|88#G~HZFpA_%UNdrVI*cnkS>l1+y?%KzZdDC~&u1
zr=yu<8(!qeh4!(rg6gfbmT=RL)@6hc@fN{W7g7~?0>C?R3TzVuh_QkM%u8h%2*g&f
z;BHt^Ce;E5aJIAv;4_f23)Z-teF<@Qu%T|6Lzm~1?nPYGeO*>-1&SeY>c*xt&)fKL
z!$;H9HLC;JC1Yfx@Jwg77{jN+u|VtIi#$5OS1m{A*^TL5oJ6ZfYZisS=>@EE;9#X-
zhPm4)P<ZEgku(;_DH)=vO0jCE2`3Dg(Lsl>Gopnj`g8JgfX+oP2KLdV-!A)zs+?{h
z?#km{1ga0Ug^-%)I;^X(E@5ooj7jw=l_tHt4bg9a=T2!6M1>+!;vGt)l`dOv{!*tw
zUkY8}LD@qM2@TAfT}s*@2nwLEK#33@>^1znSV*v{|H-7I<s3AdZruEkL7C3D=6kfT
zdagf*-c3xP{P<LosQviez1z;n&bD(J5vB~7n&wGCA_bj&+83^kyNd3cSP?ml7?BP%
z%hQo{w9A3s%3l%4O9TR`naNvyhYfd-!P!~2yT#v+oQhslH0eHe<{&g;x%LpJ-PxWW
zSwq+X$Z@!~5G_q4ZTo26!f{7U%D1A^`>n~lkr(><njt8!OZ+^~zzla8Xn!!$n$d#f
z^g{6?1%t|`+Xb?S+cnrE59V;Ai@$}pUoXRmDegCNw}OW$Ea60UwAw}w7dEgy;<JnP
zmu94eJI}JljL*^DGY!Jn$)z_45MzT*KqfE~c?&^*By<h*7(+KfzGLtd%4l-cBPcWT
zEW~CpT}4IdqFDSn1Rp$PKW%2xj_9$Cl}8+N9)(!}*Wg$<K%}QWjYZm9x@f)w<n02;
zj7J)2+MQ@lE-@RmnAiE3fiX`3n$i#Rw+^H4j=l{Rh|<bTHb%!5Bmol-#wWFB!zXvw
zW07P;<n~($8g4pA!IX*}k{u<@lyfb*3$T*fUPVkIJv=QuEmgLF0t?y4PD5jXp77@`
zkb-cqxJPjc+eW09Yv_u}1fvG(^-b%MDg6ElyF3cSO>(!)0MO_X3X|HX>PS)d%!C&O
zf^JJUX8bj?$g8b3EM}-OuwGFmh8%=*9(+#9p?V(7R+7p(mdt4I>C*V2i~*z{gPtpT
zPaK!p$25RnIHU_ct4BBjj!$r}&7Gp9FE}_}iM=!v6nPPE)Qmpc8z#Ta9L2dZ5WA1&
zg;9e*uf=%2{337=ezCAj3WX#ymrTUwm>1b*Uu?GNT-q5?(93)c#C6(%9u*(a@V5(4
zKNU&ahlT!c=h%n_rD5mH9Vh2tj!T-L`=ah23kc^lUIM1GCKm!FI}VkMw@g8TGH<{S
zmp|wOK?cR;muyh)u$0WMAbeZB+xQpwTbsh^E#?=cf12fm<RGzf#YWvf|9MVZGb4nn
zg^Z+W0+utlk2FF5GJir9>@VJ*o__wl+siNE1SHf~lAnft{)GL!$Wke0Zsq;w_I~+0
z!>#xIeHiI9v<XqHFBZRry}U<;y<@-++3&<&*_&nYuJI^ZP+q}qX*2&qoKbTvF<K6F
zV1s~9*E$P9>(S26gEa*IWH0+0#}jN&-eDM-szO>LF=p(@Z~;E@Q7_w@C+DxoC#hwa
zzaYF)E;~hTEAg=s+)s|pqj!R`=Ep$#xz;Nr*+ethjG_J%Pt{gKKZ7966Zl7AKSLDQ
zFVypW-Wys_ZmBuogM@Lx8I0CH)zv@cz|m`9uU8=MiNI?zU!76fKPdtwJ->G@f5xBf
z&Hl^XBa(GA+%W!*e+XGaBACXyU9ftRilW$?pDEbiD1??oQ!)GgAK|1P+Z)@A6qS3i
zz6qD8XL$+Qsk-{0x2ibFvUbXE<xLRirjDT}g@Av~?%aEO|1bJ{?|7Yne+DJsCr*Q{
zK6_50c5xNX3$Ji7efRHpDfhJMxI())E-_YSsj1{XQ?pG&PT~|FCdYI?(z!Hw-zea!
z@dQ98zSvN4l0v*X=X0<c8}4C7xoOYn4La^Xe2Df}6d|vc1S>-0{X6(68)?SaWaQ7m
zh5+|GFS_aeS+_A5w=_sNyD@L%emJSf=Jk6DR^ZAw{!$Y2R0c+tA=p-DD*6}`N&{_t
znlmnrcGM)zs15}ai?yyY3W%b!_F`wUCc^2An%F%0nyC~!BZu--wi4%vv1IxuOYxO)
z+18qsDrF!PJyK`>0v&F*+R-ty5s>+9QuA@NOjfktgqc^U+?0!xv~AS~-P%vXGCNlb
zPQn>isR=J4A|Z-|{(*6KGY2sI4z7oF_p~|CrALKL#aNCx7pG@zN?}1lw1z&0+CSY7
z{tL3yecz15^>BkIb0hSt@CrYhNo=~N25=N7di8Or9PV+xY6ow^9Wy<nlAecg(n2m0
z*@gLwH-it*A(Y4`u|=@k2^?{r@hgCS<GY%LYKgz8-N46nfJRvdx20HM=|WanMMsU|
zg2pS$;37_BI7)=Mhb8KecrWwFuy_w+cF%X`obbKsya^bcFlUMZp-suXm`-)S<N<HW
zMN>en(*-HLq#Pk-KS-@K*591%>abwJE^aJll>HyCh+C($(KoA-#7l!Uz|c{}L89oy
zY$cj$im~sXg!OwzPdn1XQs?Qdo@N3$AB-jI5u+Ol0d5zQuyE5_s55McCBE|zBH=hs
zNr@hHA<lsMxA=XyOcH424nuo=LUfL-WsYH)lfPlAsn5$SK>bnORiiIJMI`&oj_uPF
zSuf;;6=MbC#>3A$Sg~RB`ibmBg|yhYjRfo$#FBbM1VEM$#|vTM=VH{QN}UmQVxQ>5
z3l{Jn1f|+JKjL%0<yE~H-<vw$oAbXKTEFlZy_zbNoV*($4^j%xeqBAA_~I+I7v9(R
z@@s56G?gQVU|;ametCif=e^jyyKFx|A9>Et{BghWn-%OkY*xIv)aB;DpFnU%$B14j
z>1q5$@d!z7;f)aEkNF=!{+K25({@EX8}OAR#!9B<`4^zeb_%|pGL_i?m9)SMUQA9Y
z{f3GQD(&f`_bQmyp3D?Bk&hsl6~=kra)SK2LRBNW662@(4eB`jg1UeN#;?w42Rh7&
zI6%iw<wNup#CTN5K<$9mS^Z?ox}<b-1=Yx|0NjWVALgmijoB}v=eKAww*(&$Uf+CN
z;yka&Aq3@a9-F}7eBRtOR$(A8^-~&xLW2JVkn9u2DL|p2SA!rQ*}ua5FS+5i$y^Hj
z9IAhX`QM4Jl66p(*!_xaW$C6ciwz)I@w&pUiC;unf<i1%7Z|NFz{1pk<`0RB6}l~|
zhbD|33R?abxaS!}vt6N6`VAo6ob5Z&>Ri1!IJXc@;`j0#%c|78BFaxT>x{z%2_FRM
zgHBg&Hs}JJEEATtJl4X_O~kp04%;%K!n`6?2PDoaV%@wd71JlyDq{Syl1%Kn#6B`(
zZR;k?08Bu$zwTeFroS}w*JdlZw*X1@L&11pO`R;=%N^phocie{Yiw*43qzwCxA~0w
zO!jRyr+yB#bjy(ne#@b|4caXzB*O{(*DCc3C7L-6+G%k`yTgoOB`V!!%tH{XUxi=t
z*LKMVqAIV=Io=1Ie_5vaS$&!iE5;uYVmwwPN+~gIAo)VATp8*1WVJig%2Q^m6Q_#<
z%jS$SWrjQ<qT2i%o}fgYq}qMW<}~>d`9~YFb$AI=ilc8zU-k3*za)pCvC#;BnECeB
zhJnd!|8vOZCRJ%0!ckLD$Yi*qIb#cXqH8dqb1q-`vRjrPmk;2AH^W?85`HD@&xV?Y
zf2|`gzwX#HYS>LKrjuQ)qp5CGxNAI$m8_M=dZpv~+bhx7a>~_|7gi!g+6$<vDT0cR
z`Q=uWR(z<cprUm7&YncJ2U;)>mZ+zqOS5H*%iY0tM<p(>d4g>Z8O?ENoROW&CFS3-
zg)iKZ8@H5Jz0O!Kn`wetg}FA%i4sR@OMWHgHYMdwic56#qJr{V-zLG8elV4z=g^;A
z`r%0HEvQb}O0K~t)<D_z?pCaA6q9Mf_fpuS=JO`C&hI};kP1}a(Yaukqo`=nS?8w?
zek*#fMh$yfATJ@}7Qj192=i~;6+o(f%Y<}1LM?0anY^-v7jJ51EC?`3|4I`GO7bdI
zY;$Zf!&7jm(y}Z->Pjtr;HURnj>P|^YwJ?z9%Q>ff8EwyC3#U<Z)oM@P6XBGg%YT#
z$y_5g;kMmT(p}VMh9$`$cL3(k^<BDkW6S@}x`NjVnM2Zy8!UUF>*)rmQygy|Q04{M
zw+%fIdlhc5?d7_Mqhrtq(9Xoy#Li{N)I7${DTxIhii1+Eku-PLU{7ck2nvWw^HNnd
zN%0bLdmK12+3?c^pXyppo}zZdbmM1k0%c^2!y>U`IC7XYMPSeAO=sch|HJh1!@nN^
z#aB*dZ?Zt9%io>9O#xf|^S08LuU}90CyK{@fi&mG0fhZb@#K}*i|Ze0>+6Ffwm<F?
z{`-Jwkzc>qHsL7)k9YjV4*Gp%5>akUcqbv;y=Z+n(DVsmtpmeV4dm)tanpS5={5X;
z7NMU?j(KZP`&dYOzu?whBUVVe1=s<<snC~{)NHmr$xVRQB`gj^#XbBRcSuQvL(AD0
zp{#_V_IIp}!Sp!kf=EsMezM4ygTN5?_#R`5bMT210h?pA7N_JjZW+AXoTvq)XB<fe
z{3v=R?s)b|qRf97{4~0$(rl+g*$H27>$j$)QoexPKb>5raMM99u|fYzCPw(ms6%vz
z8q~go1+#*>wSfH>m_s~OlFVSzCVpkWl-r|E?o(6-(*|*C|8n#qpE#s|T#W*w6WSsX
zYVI*f0<RdrEW_069C6{)_hmEL0GTG22y`OGHX;6)0)pvofebIZggxL8^<aZnLHFSV
z-hc_e2oZOHf;oO}5r13(!L~OzybuGfxA6OP0&j+dUr>lU5WxiC1OEg(a1i~j{KM|-
zjR|f1A)#h{-Jxzl27CoPI1qPWg9#u9y#L<m5q=&0!Vpa&NU#rJKiiQ^j6_T>3~m9t
zg3w%!wZ0mBQ2fgb@bKcg_r;3PFY-4fr1v9?d3*d1;~rioNZ0Qk*@}CF7qsLF=_I>+
zq2(9PQ)7x?5EA-jmq>C?^#C{^DQ0p~`hk%>61PAm5Vm?`qsW#Qqf1;_^0L;4EBaho
zg-o!=H~j~_po&<Adma<*5xH>BlZrHZA8kb`pLT@6g8a*UID@k2{$nY2z`=r?R*b@*
z5ulN@TXF>jX;}W*z)9E7txD#h#z!SzmMExon>K_eXeV6dzq1!!Y#)lvIhzB~%9&$T
z9MC9A_tdVmd`+oN%-U9!e66FZjjb1XC;EkYv$QAyn^(Z>mumJ<mjK`i6WJ5hSxmDa
z<CizTzfFMn2Nm{-EVLix0^EDG)nWydl3zEkX;ETh*o5-NUbQsS2p+`KB0-|hr(Tjs
z+dH?eWLw{4GbmSWeLb}CY;vMb8TWf7r3z<`Feq24>6hHZ$!FuNWA)F!^E;?{Jnq0p
z`am@GUVrYO4!dv~QvU0l@VJmy0>fw;UDIPeNGitpFS9EN`d}U6Z;;Cm#mob>$qTmK
zP_G47uUyj`T=9}Uu`iFD>X-f53%>->Zr~;N{#NRFd^}kUwYFMyGvif^(>P0k$c+?x
zp92iwl$u|<8RrxRuo)|kAKvGf-WmxU^VEc*?8m;wb<6mS%X6RDGNK{|neY^efn997
z*WqiUoftDK{6S0=U161WZK3yI!G~RTtXN>CP{AIHZv8&iO`(vaCQzeA<Z3DElB*^I
zTodf2@M@Wa>q!e?PjjCKeVgzw)1~{6PPpSquJhnXece(m=I7hu+AYCpuEWRQ*r|~D
z&mz86ZND>~vtpCB_ih81evoR2K`sQ*EtzPWM~b`ao)V$$AoQ9`f@_3ikb&npb^#s8
zh0e2Ov0Ok~WO(D8Ra0B$cmoT*9^|1utVTXq8IL7rg41QJ2L^wEd=O*ocIw6Yf*n_3
zB`>zjvspX4ketaZ`~#v~xS7_^ACdeo`Q8ZpLOZ%=wR*|UMQY86IBrV-xm66Su5}@&
z7R!6fvPwx!f#?S33~y!NH1C9-sDG#Q1D7SY%gnUvj)`A8G{NaQx@XxpxUv39<bX)l
zFD8CP7-htnZ^J3$bwJz$!DLU^#Fu7ENE$$iKX<Cuau%ldH@&Rvg;BTji}KiwMWM&M
zLfq-`RmySw;7Zu#U9q#!JG_?GXO~b{HJexBmY}$*KmQ9p(>uBLoKI@ijiB%{^z4J^
zqi+HCee1qIM$_tXtEAJ0M5qHlz@m;M85U_rLR&tw6?mi}!DqvB&XD7&Euz%&bW!IT
zL@VDm^lxilJg7@+Ys!k1pw+6(M)~uCoh>(6OODGw^n$Er%I(Twi@Fu@HtVi)db)j|
zsdhYMl!A-3-f%elvL4EtLm&LO51Y<D_;~E?aTEg->|#%DjJfGQr&>oMAqqiZ>`EjU
z6}R$*@lq3hun8%yxf&38PDJNi7)>q-&H2;`6<dde7`aC}2{4U6gmE|v{MNUlcJ7B<
z5{RqqPrv^IQ9J^yoBj$22q+#H2#E0iH(~tWF@sFwC*=ki5k>1Z7oG&qjYG~5g5eSn
zxl_wcQJ{+@wzFK8$Sbcb4qBg9HU2kP(12VGSTF`(W`{fe%j)YW#b56!5E&2|@R16&
zNTnpp7K0U92fUFEo-qrGWpb_VGL=8~_ikj2wo?%aHWqY@2!{tx@#3=!bBejSR-?Z#
zx~R2A8ps6U-|g$1Udtttt=>&5;$(C1RYnX<Y!heMRimp7_Iwggxyo8*+{o?s(qt(V
zGg88lZ!^5Jk-e)wmx$@C`gaehn~*NxU|J4ks|~P)pc;Dl+Dvy>?>+=Uk;1dmvk67l
zE~dBWBmDn8wf?zLu8{E`39SPCm+@he5~emb_WwsT+|Tx_d=L;2BoKCP5OQu1^kNVz
z(M#FcK4K6^aI$&TLyaQe*+bnf-+PILQ{67sVi4^0%}69c9T26>6e8u#q%7jQ6iw|z
z*i}e6{qH5B;}v!poE6akA7IgUv=pL>VKOhml4!sdcnZ1ZM=n01Iuzm4CxvMVd0IUa
zBO@~-(_bJISQ1bcP*_mVcr#NVDIn7|!2<y9|7aHcq9!`C1pxx$1^X|k<dSwSrcP#t
z#-^fnE>8cqYsfZ9TOLIaGj!LoZ5zKeBsFNo`2Me{Vx`6~n6Z(JqQpEll*nzVX>iS?
z9g8kNR^~~_p646Ro-F~9^&axAD0<hX4P2UG<F@{mXV=i}_v`x|r;qjonUN7o99cb)
z(m=}746w*mOO7~PETyMBj)f<Rx9YlOAkkWVT}p=!z3GP4m;&KH9L4U=9z~PeoIULc
zaTsorN<{B=S$M8lJJ^@YB66!0d%SH7g;BbK*$jCTYJgd(5$n<2q>JK|vT9SXW|y;R
zt)oQ^pP1QZ+JZSkJ?&PZsN=#zq<_~*$(?JXe6A7I7PLYR6EG5YS_a{Y?Pd)Zy5uhu
z>VlzkoM^AyqQaq=x{v}G^tH$gRJ1S3v1!rOqIMT2*RdKtd`_F}K93WqGLfim=We7&
zr)5*>q~6|sskHE9sT2j(jK;(i6C14#$fCrOAd<CKMZVx}uv^Cgko@p)=bB>NN{?@%
zRUKoshipQGCi8fAV{x$>*s{*<Tjefc7v9XX5n#m=H7++4g#=hf+kva|s{i@%XmDC9
zzi^zBC6u?%5M(gLl!24RS5|&YWfxQ{BA^RBP<*%{6v2;YeC?mQ!x!46f-#gi9s}ue
zIba(ZXW^2{?vu<*8?5;I5T7i?Z6ZfNI8_fh{RMojO>hCX-o|IuO@g2>!5zZ_X8dO}
zh_6Imyu#_kg&p}j<gCf}3^Lp@I1YtxC(d7dbcdk-)ay+L!5{t<v|Burab2eD__;2e
zAuF>Nh^<s^4h(w4M<@7)U*U%3m~SDHf1bJbt>mFta^aDO{HriSZ(c^>DNr7k?z|nd
z;4X3Z`S^c;WWJ`2e{TNgeul&TmzFjuHCZ)FQ}_SZ&8n-6V~Xm>P6SOlDLxohqm(b9
z4V7$RYny@~+Kh<JM;Z^KdfrHDNJN@p667`l%TNC#Wd1Y)>mP*SCRQqH{ssI)K6>xD
zyB%z*89Z+C+HF3|Is2IV*xlv-`#Gu)WLJbKigA)T7ZG-l7;C^639;EkNFRknD>`rC
z&1W;@1i-2mX^6A|#N8#-uLRrc(h|U4Hq`T>uTUWQVE2axwnl1>9mg#BRcN$QKWa(#
z3wWt>&n2o(PEkA?3D%`fiuFs$t;&kUP+3f*cLbWLPKE|{z!pbuGI`d-OPf_Q8;;6(
zlWYD_YiMp1mJqC}av7<A`&v%I&+9Cd;;B=m?UEXqoA`+BH-ZDV&2u>{Q$_`wq65!y
zR$i}&d0HBn5;HTk@+4vFZEo)_%+Js!stHV4eo$3|ipv6D%v5o3z9Zhmo<R2QY&Q-6
zP6szG=<KyjR&So}Hc?(`@lm76GP?eQQ;nAtti4cD=I)l{y#8Ko=_8g}v$3K!!p5xh
zVn}$mG!ap6F}%fa90R`o*G#cw$Mk-#B|L@ISAKF?2&?SY(JxU0OkP8C7N-^(91>iI
z(p-bet=`yStgb7^C;KI1tVFBbx^MMg4~rPCp=_23cg++PQC_ue)l2TGyP;+)6rhdy
z(A2os!$8LX`|>Ko(q6|)VtdB47xh109(6}3V&&R(w2GB?v{~eGqa6=p2#<<5QW1#E
zFwjlqnP^_5^_g<~pHo(|mno%<Y6n1Sr21S<A>=lUU3L<Cm8#ns6B@%l^K5T8KH6vW
zUsU!2@%j=Y0P1*aB(9rE`kHD3q^_tQ<CeIfj?07lWG397oArZ}+5&9~ot9FzJVfvi
zaWro(JM+m{{ZvF#V{Z94O<Gd*+|+1Q@=t;dSN-*uyk9*O$Wp74)-rY)7s~eA!O^gN
zOAK+D2)gyKxWzl=M1oN<<|fuA0m;y#@uSOe^knlg^CZnOJFfSCzyna>_d@+>wICiD
z{?x@iJrn+M=Cb>epPAkIg8a8WveNp1>1IfWoK>8suV_O5K92*=<e@tvkcb3fFckkn
z!)s2VFerj7q|6xBEO;c@uUH2jH!!xp;<F~lS=}#>pO8L%w+EoCh7fe=WPXtz&JqVi
z!|5Z0Bp)_103u+CDs?B+R0F(x-e{z-!_2`3-}Zg*%u{GQJS;vT={dM<VqvxQdfJ1p
zYW%_GRl^NO>4D(&U`0rB%`nbAt)W*yuRxUmbR2F0-QWLs-?Ua)a8g5m+MsO+gDINX
zTJ_uuUX~QS?=nT;8qn&bmo}f+h2A06Nj!q&Z}w^j4ON{$yoSl+93pPvg$Ri3aLTyt
zlGe=HoSWUD%%qSjbw`K__GGu52@|`c%!$weKWYr9sWaU)oGS7?ac!8#vaudX(tP8s
z)VrnjQ)z;I+CvYfjIq!J5Q_E>r{>J*#kY97w!!~Id){_!yQ8Sgak{dbEd10RgkYQn
z89PM{TOpa|zpKne^qrp3%6{0m?qWnF*4ABfT=sb@l7L7`@A+<HRG!P1k_LoT2X)<C
zHcnP7#eqh{6?X#c$A7rfh&L}JY+Xf1a7C$k>RX%{$6Lk_qE-d(GRiJqr#=MLt{aOk
zR0ki<B1dN4R$)A>zsI!ra}=vqE7)k9m41Q$ySet!-@wBH1p;D$00Kh)f2vT<)WzN2
z$y(LL(&qoDL)T<&<wb%2*y<3Wjd-B|rxzeftw<#VN+>8HU~nnN(hFZGZp3itP}C!g
zH|1^!^E-$A5{w_pQTN3Kw|p6jtnJ(S-KNif=ez#BexM$K-LYU|+VLalp^!{N_l%*A
zCWRiwhfKL=9+3@{<Ssme&TKN30S(GrdInm<Qx@rH7H-`+c;~QBr!P|^EXn{IiXxlV
zuB7ph9xSoxjas}ZXx36OekhT~omNzf$@SVP41PPw-<*!)xrKyB+QoN+k5;X;m;{4W
zK9!!Jq=<hqdJv<zy}!Ax?G5)87L$FP<6JvL!d=Q_Fq@OX=q>%xm6T^!)bF@SCX;z|
zo``F?le|VNqU8eSIX(d%M2t1xaF2wf2O;<E$5OQbX7F<Y$=*H9PiCm9XPrY7iIq}S
zl#MzK@WQ54<sP<wduYDWwQDd;iq4>i)YAeKiP(DB>B5QP<<3%|*JpE#_IqgPU=Y+{
zIL$Jv&w>4PGs(8zI)jA)w5hDLuurKL7os(o9x=rM9F=AVPS&i|*cyYK0l4gEb<DPQ
z*OD}Q33WQn0!gY?#`1Vn<0R(_uryVkPS49IS&@7Hp7qLvE8`#nY%VsgutQKm@?Nwi
zyL5NCYbh(M?4qbHq;rbB;N7G8Tivf}J0V4DwHa2jh6!1lRjp0%9^<uE-Av*+_@&6w
z=c^Q!XhG@a*m66unY_MN!-@Q^p?_c%hNRH5*&OBI9Ip<j2ip`@p4`Kg<*?_b8#r0{
zL=eP_Q2**Z-QyjKfgT^Bh+rZBihl`He<C0P>LiY45IKdF#UIx=n->{1DMU&9tuFoS
z;AI$}v6g<Nag!{DQLh&bJM8S%Cki<nSf=p$=(bM0<K<CfJ_<vLNQN-!jd9bMKEex1
zW|3+F!c~b1EjqxMMu$8%nx1IukBDd6RRS2T2tS}4%V3adgtq8}eW-*1NQE(oTzTkx
zV!r8M-VVSlQ*{0IG=Zu!D;jP&KeA#r_41zRzS;p&D*|vxR>d$Wl!Bs;bqm`GjPT*^
z#D1|W1Qws6W0sU*R%t0<H(@#Q-~R&wPqrs!i0D7gy9_xH5X%2I)Y!@1#opNd|GuOc
z4Ok!TWzSze?Uj_{OLv=5M~P%a<X)pBNXc;FzSQF(!A4wdw*%6>iH8<C@ziKiZYE1l
zR4t2wWbp!08{2^mvu2)!1x|cm<`J=c(`~7)8r`l`+wGuKTRd-9GPv(N=epWjJm}lk
z$A7Orr#`3tJ+JRa_PSpCro<a#@_#J<qET@F*_E8WLahU#ur*VSh8Y%07#Nmwa)?0r
zDv_f<bfU1`BZE=@Q-PRuUx`{reXn(~XGL*+YI>krbms)Azh4Z~(3PPcKAr-yLnc-Q
zc3{QSsYF<dR8%ztY>lQz5w3M#iBjivZ-rPVcCU52M^3@<xO~vk-;a~i9l;TmVvXA^
zx?clhul%MJ(nhmYcn2RM$MwcVG$0pJ=Y9t#w=q(4uzX_B7PN`>@~2%G(>N`CyT+j`
z>Gdu~&dRth$#;v4{S$ACuJkoB$qRRjrUy9WGka_H6Lp8-3>cCC@c>ulEk4OhL-CEA
z0{qRapOw<A67?s1K#<&gQ=o5=8x!;Fg|Q*4Vh%5gJh9)RksmW-!D+wHk-VjeW^euY
zJi*6^ek%F)+5MWCqT9gg@$+Bq3ca&kct!s9?&`^Ny7Hib^=mt~Z;hUFEuPK4fLu9P
zt-y-%(Z&7qdzX&Itiyx3nVr9!NQO>NUJFl{PnOr`mUxWjiF<b~Zdt6X^j826FD=lY
zX6B;4>x;)yvPk9>Hv}-Z&n}2A@2}5A2NJ~Iu4P~2%i3m@q(4j7w%6Be=0XZ%r~NN(
z&Q?OW`vgy(KDN2|Oi0F!&!60z!ycJbtBYh|*nNz+Pp{yepCCpf<H@Jz=l7Gk;u*<1
zd~KVHn>1rs79^_>+kSa<EwHJykFk(tLDLBaF3lYrSP0_c+JsGisHT;nB>y&g*B31w
zg6r;|!#n{1ZlC+dg8rNMaomh+>o*2(b%ooD^BbpdfSdDI&9r<YL;HNTx%1Ev9=x1S
zJizLPdvMYIkzBd5_9_DSc7q|M$`zb=OKn925gk4hyeKP?2`y^$6Xc<xAWl;2k;WgQ
zb_#R$I1cww<8cj2^nfRDEmpAn){NrN=ZTFVTi1-_DVzl?C-~O>@FJ~7I`k+D!MDc3
z7pI?2P$2kWa`M3W{Tl<Fp@_9aE|S4eRGThz7<n)xujh>Rr-}#zr|J$m9;-W{PM-!A
zb~3hUO8KtX9V}U^A}t<RWr{BZa(h~?%n)OQV#qLgl#>^EGK#W9J6-99{+xNI8R&7m
z^o(GKi8+-&%(SHCJ<)Bl`U9!1EjQH@vyiiR9pLok0W!J`I~8*p;u=u^21FZ4+mRZf
z<=P4;nSbnE701c`7CNwGa4eT#izk4rr@aj{87uKXmJA|HF%47AIg&OK^e|;!ap4UU
zX51*La~IU)%J*_&w#5E9LwyIOP<A`yQe>hvpPJR>i%+lSBNZO2?7Vfv`((LetkymB
zV0&2~c^>?ztT0-$4wsm@A-=p+2{3WP0ItiPJ`El>k@7@OYd_`|cur=!peWh*59579
zWgf%S8xIfSY=FGA7tjpsWwNRR6PkSKo6-p7<QZRbDRBT-5^j{JqFC&Ja1~wljRa%D
zStJ>^k@CD@l`vKsHpw6i`7pUIcXEoP3glAoLr{iFn|Sk?h0+$cA*r1YDJJrB(be9-
zqlj<jl4(V@{#SmJA`lJzW=oNudDImtYlvU|T-+RSpQ1voIL`b5Wz<><fqY!lQ6dza
zrzZ*GVIuQCEkyaciA(*pUJ7(+hy{F|y!A)Oi$;%DO2<Du>sUFVY6J20L{ipz7Jw3=
zeI#Bo<q;QB<4zwhfpo^22-Gk-c*}efp@q+b{bEYdl#liObVE<y9?=h&Q27ScbervS
zOQih$U{5Sc%u7}LCNnyB0~-}{Mh|lGq&pRSF=C0~XbJd^VcNMD2cfz_%rn(W#_V(q
zSMdGdVZ}ehCB;n~>}Uv<c!rL8h8LCA$OBbRn7(@ZiYcRisuvJEb&dyd6+HuUQH#na
zm_C@N(#~`H&2m-V(Y>Vh+b5tuGAAUxLMy1H$<#d~%T)P<nv8;Ph~3FgRL+TK3U}v{
z!cD%JbJx2^r%+2}#XdTRhR<OrbQbh#HFTnrj5A@?oQK79)YwzZ-_%m%py^`o#Z^Ci
zy`-Y~N*5NNqA|Bi7t}-gM&H~@Q=*piQ+h=7)y`?BE1p=q1WWW6z*BmJ%2d6g^!m12
z6ox9!_ARa?T)L$9xm7xc+$+BvGez4RmMZRasdwpKdd2ObP-KcusWmN_RDh)2V!;pl
z<D)wYpQ`cUQ*$cfspaHdMT`W8+%3IEqGjw9-%-BCJ+B%8g!Kvf{SufLV{cC5K-Nm{
z_&w^!^7lF6EkK<V>Js%`>8-FSMGeI?n64Qi<78JzFk-`B@h6#57K61018CU@6GvLr
zYRq)9<4F*45ERYV^eOz)uN6N0{t%+-Bk&W-U@2IenRMd~IE^CBf2gx25bn%H_xQ^1
zgsJ(b;>y1`_cNSW+=H1jp&H0jeiS^u5}~F?R?4NNBaXO0WDol5=PWJ6AohKf4M_$H
ziE1*5)SWw>zylqZNR_2g8Bz$2M-gSrkX<atNgCb^r7?f9{rW|y{d4Jy6P*N8q`+ss
zDoI)WbdC>wK+p{IM!6XRmDKT~Ao+)!?6zYMENnSWhD%DKF(gV`4TLruu@oy`=K?gS
z)2rsh2*$SRmm}F(_?g&}89ivMxw%<h0qr;_nI*F`Q)UD!LJw&IBe5;|W7G~x>qg^n
zO>11FkP;H)VPfZSa&lIMA`G)j8>O47TijH`tG~V2g->eUP#rsw&bD*#w&Bcl^zy&q
zjFUW|*k%?;R;}6?F8>(y^}0s67*s40R$6ATsf`Ps8r9^vPXGGNcGdLiMv_(0d+9Sr
zV>VhxC@3^5dq#{){GsZa2J~7oyJe9ycwjVUoL7q4q*&R)JXtHEgM5c2E4EEovov6o
zV0Jg;|41J?vHMDD2r@#FBoMhQ-AHJl5GjEBRos7pJJ?TdJTe&Q-U;XX@8D%r!j16i
zAuq>{8J2y`hR->cvq$x%x%=%PRB?*oO?^oZLA+A{yde~8U58oR4Pz@TCJv8V%y3Lx
z1cd$>ty7H?kO#CsEiP<CUn}YI*A#xU3W(5@XTr}38gYS9huMMnHih7WeIvo*f$^FY
z2<L~<HA8p}3B?8CH_aK&9io3Qo`r|x0^zmHG0i2Rf1uC(FSgz}I<s%x5{@hO8{4*R
z+h)bKlZxIL6(<$jwr$(ClZx5re7C>u-?_KP*kg~q)}L#R`K-AgI>BXuWHrvJ=9bVu
zpv+3rKfs^r(mycI5;n~eg8Tgm?Z$t=$9PkiC2X1X2<+Yv2>^Sbq<`Sgy=j`w0sFLq
z^CLWcKVzlX0(k?4+k$v&z-Whm2*!9Lo<-6<l@tn?77>)OVU`$(cs@w88pdqFK3d8l
z|7!oq9owR>@H7Y0sh$;>ox9V=nd6_@ZDiYxvuQGAki#p<ZGmy=-&?<-U|5*=q7mgj
zyV{lPe57OCRpp=JY%#_m>7~dB!fgynUAPlW0S1wk3tB&hOx&~0A^XIwf+6Fy4i@hK
zmTSibqLZ7hMawT43#yvXkko^G>IJ7%1{0F~B)aG%&;oMkE8#W^5dbaKC-SE=9R-5_
zG3;0A3?0a;utdDY88^5XxCdf)#kBHXuQts+N7Ce^%3U|m>|TFADMy<|dZP@rnCy+2
z`;<~mPOu2p`%lWk&O|Vc8xv2k7Nw)w!pa#gmki!(r_&+4=ioQj3by1M&Lb)A(eD_p
zN>{@If5(H);8bDC`~av+UaprDt`|%-(VKbeH%_ls*)VoQ%mRn;B*$49Y6(P=10>63
z<N}HH5UQ3+IlMlMg03H4ci@9YwrtLnVVJ|?<WWv8atHZ@OCI-rKNCs1R0SoEC`Th^
z5)RZnV`-qqX=YTGUcEi-&Q=PsNT}g%8xLM#2^g~>vWfYK`35`@53u`4-9zs123TWH
z{P~pmkoF=Dy!uytU}xB}M;sj=@#uMj9j5T;F?VQ>Ekj?bGf7vlbIeSdJoT+QagHrx
zunfozY{<K=qvLmO^|k3cLr-Z#3qnLxoRq;egq@5_5y?=T5CKjM^k%$UeTeO;Sr0@N
z4_ufF(hPa3dNPW4sNN~GXI=&}E}hW^$43t|HMuKm;yZgw#Q}<ae@;%tvCVI=C@>3B
zPt7v@41h*X%3tqn<y7Mgp<koM4QuzeKSnx50&H6!j7iU1it`C*7V%5=p?pI*-W626
zXi!vC;hcpv9U!TdYGUy=g@!hz*f+&0+R|}6!R-#^Q!-a&YUXoBGc7rs_dDs-xiQNm
z@&xIDzBOjyQ$WbNF<tTl$Y5q;s`+?@odWND3fH}pQs&^>6yJRe=pOKXL&@aMM2-8$
z#uSHI=xw1ISz6vx8P;^e0#10)Vg%5PdbnDDC+v0N;2$imLO^|{UHkF_g(!IW2l}_>
zB;zXTfj`)lOO3NS0$MLKr@T-H6)&)@UF3WCk2OjSQe&E@=LboK39C*SIzqO{A(&!l
z;W0b|6wI8_ZDvFqa3;`V#NjrviCsT^C8rk5g>1#RiMD0iHf4)8CDk_-X*~(6_CBG*
z!5d?c<{y2-`Fzp5Ud``aX7f)B-dx_`B9_E=lCS!`w2hLYKqge-{x14bJ)~$G8Hl`a
z_d0ENg89an>6%;Fth}YRtkT#v@1<I9JaxkAZYN5twQ2k;vy1BLH!$`|7_RtBbJ@K2
z2zZKdY3$N0;h0vLfYGs;y2roGG3qb5Y<F$WDW+|q)#$!h@P(S#au@jQE4Y0{Xjs(A
z@`zoBT0XIdxU%Pxy>dBRv#)8+3FO&8mSJr~1E{9e#mMAtLvVA$PMNCpB{Ch*Gnr;)
zc*2_ViQxMtd3yniUlB9*Hcqj26h`Wv#Ji&m4t)x>k^1yT;)_mBeVCS@)W&WW499H_
zJGJ?yafb#XCiZHWQiBs7=|0)3r}gxh4T}g(!Vs`ZP907qTi*=^KjQW%ct5L@KOdWN
zjO1G@QZ5OJOs-%*)$&@7-fy(}Af+*#!a5bN>|j3!#tjpxqy=T$P`50rJ874-^tfg5
zv9g);T4YxnKodI_9!ua8Pz1qv`DJ8Zm{(6*k7)Ev&s&EVov=R(7VkRcD7&R-iHONw
zDOD3pfn>DWdT?1~a5izBwPNp~vbz$7raI7T$hVleo6_Sas%q1Q`Lr*M;_=C?O>V*(
zI6;TAfComVeDQc^NViU%kDVdpL+%1)%d+RC%v|!Ant06qULJuQ1Q@O02-u%g0{SCm
zLen|Lox`j#n6oF@R)g{<ig4OUTM;?H_!%W)L0Pj8yoA5-`a?Upk00{Z#|@1UC#N|6
zeI%Yy%ASL#`fr-lFHM%5-FDE`D%;}aDyKVe0F1AXBJK2%TEoJJ`g(h(j>F@ud{o(j
z=9P`w*(gjUHiHr{e{jsBKPmaOleZ=e>T?03z+tvEl`W5(^dnCwz21EFeD!Ca8D*sV
z`W25@Fz^_QvYHCxPr66akSBV}6Rf?-ogw;9jOcrQo#~xau*C}_Aj)NF<97s#|A{L1
zC*~(7a=Fp+ci#6<-O>YX!^tz&m3M@#qYO|X!Gu@7dDAs(apLbr3RL-^k}6fIB)2$c
zILUXw8qr9DaCZmRVNHW@vqkX7l1coHMKInpSh}e${Cz%5+lZxL>S7N?upuK%x-mY+
zDmjK0?-9n4%;rv7tAOZY5ebDI<=n9J?<QLk#yTU$EJo*|kZxCO*W_Oq)U?0j%thU`
z<13<NjHUF;Wj}>Ez{47hIT%cgCM^OV%!4<ZvB)ffqs@bD4dGcRaSx?9Mz4p6TpEOj
z8-xjT;OA2iSbifoUk(tb4=GOKHGgCLXQbvxqSP@5^j}Nxe-s=kd9lfedzzX#{@;cQ
zZokJAz=V;3-zjEEyFlF5e9A#cOgSbThSCTi7U^hLA0k^HHY+?s&EA&hV>u=&lEAN<
z-MJMe4Nf$*LAiTPw?{)mb&G8QLjzD-BVrK2;^axHbafUBB5R{;<5C9OF&jwZl_Ilj
z{A=b~Qs7s#*)qO852S$&^v?T;>G|v4b#`A5t^8RbHecNkSTLt)796mzl5NQ8AfM`z
zk8!yabzFzM-#KVujov#evXV90oY5h1X>iQ$-txPci~&ERbASnBLgN*nxY(+#aEPay
zafHtwym7=X{^2r#d^7*IAfaY;xhZ1{s%jy#8Vcxmxic*^ZM{nFMD)1ePv2ec9R!_k
zI2NAZM2^Dp<wr4a_#C|eJR60Z>mKCueQ*GA?h|4h65x3x0%1I23v&HM^9LPU+^`eo
zsgORn5HzxvC0&zAOuN>spGgEq=A!Kv8+Y;wp`mqtNn))8|3wDVwsRXqa*T-_Qi*1J
zrKsmAc=XG>Gg`KSRzgNI8{K++lQ9rZbq%8bP-9i91<;kXh+ftDc@t^ewO@Fh{s#7+
zZZhS(u=79v0u>qjADuG(4XTW>yYc@3c0!?F>0iK-hSGpCQiXdshAxGppt2)#mPQmZ
zQ_#GN^7o<IN;A?Z?hRu)iSPISxXp`pkr9PoW_z`me#p6*Y&6^<%mG2H%Mt^-P~j-F
zm043!R#N_%<`HUoZy?K6ZKdmxY5`w47H_|ra^W;#bC=DyU*o;ivi$-hns2XD>@ki^
zd)0`Q1oUIXI(A(^)%-Hm<!b5NDv*(ALHIYO+;lYC!DlRok6*d#^oB^~QxpOFCyc;T
zA6K6kV%_-;HJazvCF&i|SSvT{dxA&hR)!mhT|p=)`ur=+qo=;QoFiyZT6@PGautqw
z;e+HaJNfO^Zqsl$Rr|#EhTiT=S>n*fBynnPYTBQ%2I)`OJy@8^Vwqnky|u7_1$2v3
zwoo$kaTrA&*<@I83Fc~qzHRUcJgWo}C5peG$JDYha#}(ZzveB8cat*b|IE6FrDc$*
zm6qTjJCkZk#jl~(_Z>AFkG6|N69_H2KLf+%`(sPbxPAd7nwjMgj9GLgvNusXP}%cY
zq^st_9|I(k=rDXm*BYMwTWMf@ZzWRRzd-$WFoE!&p#JY*!vDZ^>fgAA&LoQ@{}QC4
z*+*&@QDPR!hY3Nj1|uF6GYja$r4o@{C%G4}*J~%48~ovT0*8H)0>lbo<7V~nKJH+>
zTA6vL{Kbm*vNzD2giqa}@}xGXcF9Vo+rQ}9K_e&2`tZI_4o=Gq0SB~S(`%{d+(O&F
zS&1uD;q^DZJz~>u_zR!?Vw@~YjfE6sTvmCOKYy_p;x%GpR~|AhkyV?y>*^jAxwS|K
zv~1%YZB*e5mMbH<>^2rK;9n76DF&^F-}7u6%JD3pdXzX{=3wvoovc}gZ>-(9SNn~P
zg{r##rFNZBJeR7<Mdy(9Oh*!0!h^+ez^eP%A~91LWh2jkp58nVVT(ZYD>q4<`a*@|
zFsF^nj@_S=!GzG@O&eqYLv2!3FLwd&Ly4*cR|>N%p?H+a8%?y9bEIDEyac*%)Hl*>
z9E=oA1KBy3EemcmQPtZo;t<&-F@){$Ctm;C=n?AAW?-!gH~m?C<n1hYX~f|ia<=p;
zH@iMp?dm88u#7>@9CwwhWl*9xrc_R)fz8BzI6Pq}(}Bt-zvn;E)gdlP&V~d5sfPW>
zWCJBHS4#)`e~yJp)>u<j)x`SNhu(DTf`y3^(M@H@7_=Ou(aBZoMS$x=O@iXI?}uav
z`Qzw9IxF6BQ?-TJp<w8H9;_;zX<qDSX~DBFy%_dZ)96?6*_{}Qbs$Qa=wjb}<k9{2
z-S1@c<LL%T5NtKZ0sRFffw?>vmeP}p+Eh*$A-i(GJ>=C&Z<rdp1YkRa4GB9<Q#!zn
zjmE;pA%VU~T5g0-ku}S0Co|j_WFErh49tP2H22DOAt^Q@pa>h{rtBL4L&A{MjkX+<
zqVWW24nm)v&~i1A?qkU|Ia$bji;lK1(~Eaf77>+`T_34>nZJ+Ema|@Hhj`wtbT@z-
zWOLy`uBRW!yiVBNZ$;Ts&^BkT9a>CvZZkvV#*UOcan<dA!%HdK2#mMSTB*tm2C?Zt
zSg7?OQ}gMhB<FfNNKDcev`q76VkexdQl>{mTN*6Txhyot{@`_RU1gO}5jik2@tU#S
zeW~Y1<9s&-+OyOrG_uobV&>^mCt}DRUpOT%s#XrpH%V_L)DoYt?FV)*E+*NZC6e|J
zFh`{rYsO-uWiv3y<S0tTmv$ec!XnX&KZe*1%EM~o);FDQ#-|{^BeT#&Lgl%8#=0gd
zr8XW!_+H7Va8gF7^;tEXC}^i~d3E=;qehlgmin(60riRsFAwDaKDDV3hT_;+dsz*i
zUI510hPHeiyt{lEgzwtO)LRx9!*oiRD#(#v=~8QIMb%R$zBC4X74Q!@NHv<hSQD(9
z8jkQKmn2ftW%~%7W3LW(x6Kfxs}Ueg+T84Z)T9&|em_#xI-{PYFvW-7TgmcdG{fxx
zgu9}EE9lY}=m8pmVFN%86I={nQ1y=bY5YZQ$jn#4PiI+zOz-{LkJn?aM1L5Qrh8CR
z?_h<--6^n|7w3_K97x*Wog{ayz92Qu<}$Ud)Xlx_V8x0~BF?)Qeq3Vy0Dn89LqCq2
z%kR|*&<WUTv`>{9>fP>x#ZfthQWXEeINy3cPKedKjOJbz0$&Gfa8CT7p=hRv?fX?(
zw=~u@(GnU`40e5`sZo_8H4B<hTQEP@Hjw0iE#ta=2R7IACL$9>J`roEP|a||bw|$t
zl~H0+b_hQ;r^PphsT~Aj>$E9JPe0BQi2;57WU`sTYmCbbb1pJId$sBuz)&*flDd)b
zyrJfbFgwf3Z8g_c(oCB2o)Wyycl3VUJyYLVy@5&lsI*}O*G2Y0w7)CFn8W0~_2^6*
z)uunG?n^kUC+g%{H@hm_1I8w6`)Ed_4={eWtOQ6PJq5>ALiduM7HW%VGPj{E^_Q3{
z11Nr-v*A@>efUGtcLOmDhLoeYwjlA&ctvW6H{=a5*o`$0Z{V#piQcP)6$ZG|#Pzx0
z$UFIC*s)8oEAhuo@YB!N7UKhH!98YA?Dd3IF{bw5bv~>r6-|RXl7VdNJosK64j(uf
zqs!-`+*Acid)<^VK8&W{-!om#7peGaM}Aheo4<!C-t3Dn>D87#HA@{c2s>%qKY71$
zIenXU$-^aZcuxg;1saiA4fYHE2>5xTADjlpj5vb+IQ736dOnrJ_;H7ZxMzVkT*D=z
zH3EG_Wp4x$u<z7oheW*`1Q0pRd+dBkVM0Xy-f9)2@@u^3!hog~LU0c5{s9wr(Vnh(
zz~tA`=M;re^V>;mSQ#6fC3Y+%E?d_!Sj?ON3O|WU%{DOB_nS+*^1`!J?~CXseCp^8
z-8dnckHXh^=Led$EkZ|&e%J`$l@|O)N!T+$`Nin}dn-s*1cz^r*4y6yhPcq&Js|vZ
z<`pGKITYVRHH4+C9G?$vV+EfFN;%3Q(Oiq1hg1@NxS+O&eV<JpC$a4EGb@)_rLZ%Q
z%Xqwk$|o*2(}@#q93MT4G{Y-x^yW`aY?SIq(n{Vr;nxVYl%I$a=RM(nO6y|XyL32c
z5D*E>f0%R?Ggmifdqop#Gt>WV(qlAr9dR|WzgAMnbqz+!Z|suTriN0;*JS<(p=K7x
zWT9_zMd0!xN>jPgJ6yV%jcw_7)Fpz+GYT;Z{n^e%pWk2-GHzY80!}me9RtigcLRSA
zU;%#+{Oe=y{?j6EdH%fej?5+2^~rbV`}6zttKr_^`|rgL@*Mwze{?37)ggzm*1mCi
z*odhsw}P^>m)4$fav1H$rFC@n?(2lp{2A@XS%s5#LiNxYwKtE#%xpvF6FWio^Z=))
zXKdJs*94r?y0E6n14+Ou1!F)|n8V*&R6!ThhkA+M`E^FtiyRydbhvFn%9&I0t<CD0
z`17T1bIllZD>@vCiZg$_TD<?X0PUtyT!uS;uA4I>QCtN{Af&j=Yb{b)bDO3dL?V@>
zz(U5Dq_}K0%QkCe&}g%ooR!+)(&7x!TP(8M4IG{9rZIuq)n}TTZ|gAqisf3S3BwYf
z^|dnYqneJ!<=}9e%%~>fHf@)6ucPBkE|<4y46t=F`}VDNrc4NrPBItT;xPwIsmj7F
z6!TljVTzf_coi_2tzrbEMlpBb<?D|vL-MP*HRX4!*+DJ^<&(oiOeHPc+}FYZc~fzl
z&CNhAw}6iMm0#Sp$m+|&`0N~AKLO@)i{DQ;qqWbLqqHs2n%KpxMHKRyz?{qm**F(~
zC^}o)5dx9gKHIs(N4;t}dE)_$Znbw4LIu;$D-8~_M{6XFk%{IS|HZ?WU<vxKld)v+
z=0oUZ3O&+qs(t9h+&%v0cvfS1v|`<gh>m=3cJEahookesbJb7P+HzitKp(iLST%Og
z@tV_ptdlc#FEe|yd+Wt^6s_!24hVbzDNe`SZ$BtN862H?6q?dp+5LLbkL0SI%Su8*
z67KEa#EO!5i7kiH7&(JebH`MJ9_8yWeXb?>*93K~a%s~OGzmCIF`c{mqIVJV30a(k
zwX2>j>IA(+VZAyrx^6tOsytibyl(5V9v4YP25GE?4zBGAHMh6r3N>?H_zt02R6XTI
zZsW=|3N?SL*&c^a5wf~PcVg!L@c6}TN6tOFy@77cTK#G%@;p7r%L&tO)GcW_!;d*<
zK)iTh=qgzt@Mjto1(H()iHk-=4~aK~^N^{MHgV`Ue1aaSqjvYq(R<0BBDJs?Fjbno
z`jCY8JJI2WlK*`m7tjukCbs7j2zBZ>Ot~)#@&LSQ4k(w%EfTsPQ5m|(o%!)!=n%zZ
zC~WUGUvH^3?@FmPr+AQ4bw$|ZV`mR`n0pSf&kgC9MGe48q{$5MW>~^4)%D$$#VTq{
za>+A4k8zWq_;Z&WW&U>2m<D$yNODn_zO7EXL2-I3i+GyzAN)q|)-Uk>AYFfFwmK2a
zZ%<r%z*(|rsa7V#)h+;Dj-U^D)9q~+4nN3Sw@cr#TTg%KG5O;h;J*lqx(c$-D_iFo
zTNj|57n>?MWg_m5A;uY+kL73~Wd?U5&J)+N%~@DVoww0E!WRn?`%Pj(Qqy-QnZ_ry
zN$aXvx0iw*P!0dcJBC<{&J&=dO@f~{Rz87J+c)3|YNemm&~PSXxl8H<-gJKFqBE!L
zjsMIoREOnGm)8R(xvYiIQ!vc5$t^D=X|F888RUn>-(e4C{3@1KyQ0fi$b&k`J^j1t
z(JgWytX(Cs7@2If+JpaykAWkpq_22D?YqMCEEPjJPsf<m>KlbR8*i{OD}fS?6nqyo
zdrQ7}hJvDqN%<RDS;~<xb#uk+E9~PVxs2D~yydi6cM(J7R~ED|TkxFNt?Z(7LsB6(
z)^?CIVQ9RF_<h(Z=v$@Kmwmb~C1Pdf$H@mcjj;#*vD_4Blf}43dX=*s#c85Agcp2*
z?czuvp&oI;M8V^XQEJj0grQx%1CMV52FLZEiCx3MAa$zNpzbzUPQJRcZZF3MZyrDX
z;bHG=h;Q~&FIewvV75i>$(sBHcU>K8$9#lIIBO30%-`^Lu=4Nx`E=7S{5#F`3+tmk
zkAWcmX9~1wgmrC!ufo)et8CV;=m4pagosz%#uDoVO4lD>JNZ?B_XOb#Yr-YXhbLL3
zH<m?1NsbyNq^?MKgORIyD4r|%%sX8GKyS#dQTJs$rQ2GRHZy;rbcAd9PubUwE0F|}
zl^;KmU;|9gyUdVyW8lt?#G*&;U;RN}B>#Z#?-xIlhOhPGcN=@tfKTWTL10jhP~VT+
z6~(=p9J_VHMBH~9AD%kA{Tjod6V32<;(C46mx|bc3+ZdWwDb<*Pn4}!xEv~m@dw`7
z&uz-xl~Hct+&xr5Mi^&-zOcp4YBCy(?_3B&rJ#%8T|t^Y%riJxM-Su=O7a)uP893D
z-$-D)g0y|wSe+P~$PnM+Fw@k1^uo70NxO}xn>Y~P`Y;4aK^wul0aSg|<gKL8UnQuU
zB@iA^ecviU@5#H>L@zxuw*=%|*dR8KGPk*?o%GPL)P30Go<7K%l@Nj=KnTJ<`|Dmy
zoE8PqT*f91>seI5inbcdE#+6>WMB!k;Xrbq9m=8g4^2sE%<pjXYV4-!Wg<<^R&SBk
zd6m?%V{0U5J4j8I5E~(9eH9lsh_-8pm00BJq{9q#7dNOj_dpvDXw96%4AqrxfzhQO
z)oRcy`t?sKI+~jCwBXgY(3(Aq>~T9eu=-oV)p)H>K*u{m`$=`B3mK@kr$Czk=oNx8
z8{w%Y`cXF!y&s~jP~@5@J^1K)zlm0mTM@$?y%<dq1k;n-n}4)jgRguN>tUB&FbCFs
z|F>kPByOSTM=%hO3y6PA1yXf&wz9YQN8+H0z9O11I{!l4*44ZWwT9R&9I|X^J~U)r
zP*H9?2u{??QV6_R=#ZOkO4YDOK&av{CV}r9rrf_%{G+0o&)4V?Xwu}5x4B(zxLsy0
zS8{T?L9qHeW6kyEm<spGV~wLNN3F4zn2Pr@BAxJ2^zINTI_Xa6SJdUo(@-rl=rQg+
z%r)U$UOT|@iI8d4F7Y+juOl0+CE{VpLCG!Fi+QP$<sPe`LtvBLM036R8e@vq8`uZ5
zQ*1DP4~i7gRRPY@w*3j=+x{HK3QdSS9^Eb*&AG|>9Aw+*pdGTikJpc9Y%lhw_%i&U
zw{o~lm3zPkLNrF2+NU))^3@eg+qeg-CY90Smq-%brX_~b4VW_=w{fPIKK8qDykf(6
zzDW_8;4+?vfL(=&726ay*>f#M0`#^0+o>wPa%t>mZ2|gV-gPQ2<HPa`xIM2%`_UtS
zAty3wF<tN?)Q<d7(+r_Wa6F3k)KxIM@87ZcztXA9omeAv(+UO!r(p+?bR25zu+5$s
zOPMpzvD9T#yvEW6KN)RhN-S+x;5%&PK9B_O-7+>KHIqE>UK4;NZ+trd?J67&yK6ZB
zq6hwJSzGe(P;5zT?uZxOK@Q@rI<h33X;24<4$4mj&9-bJmQgCKfwMSd!jA-w>PVzw
z#{`>31EGA?XcX66mmcn$L{PgkF(Q;rQ7RM#Dgz}noPqa%Q>d?8n*poLyR&JI(?PBZ
zbrascO$#vs|B&vqZPA7K-c(zvGz7|lu~}U@B(i}4?V<zr^+cJ@2Phq?mZ-Wfve1HO
zkKe@BJ;IfO)hmIBV6jMp=fAoQaVNWAqqYoh-ky>FR^vR>@SJUM3|8oc#-g~AMnz$X
zLz#*VJ7w*_SEMk7`m_eB%N@Y{mS0{^`zx7wfVK5;GCk)R>RaI(%W&Va&wNJ^!gUR<
zXN2)IEMDUv4zi&pGY5}P@fJ_v9O22?XlSdX_5cR}r(p>~{1Pe&A3PZM2zMw%@s*C-
zzjydgp5*ztr`bV)fIJ|8fRO(GJTY~!vop5;>B1;%;^OLTZ0aidUmO3$nk-ElO*BpP
zElx7*XmA=4bV7EWD&k*dkxh`0ze$EjbZT{~n(_OQp+tcS6hwY2I(AFlIXdtDt1CY?
z)KM;U^gs2!g#{mx9*|tCte>-_O+<n!CYt&uvs|Y6Eq45#`B^`lK38@?Y<uHCLmZ|0
z_CN>*4nX-p?GX8}l#t<i<3vFP;HN`l^V-5qwG7}PNm-7iKwzR}3{?a&K%@7htr`x=
zi+T}ral*!br#w4(rN=|YLNP^@p!4znnAu%(=A(?6`Ed(opynvoCkrN{*YF5qr;=N5
zVOwuSwtghg0zDa%UQnRpT&sz@1X=Da(da~<F`IB_*<*)2#wGdz!%DD1A4p!!X@?t8
z+Bk=Q<>bp^9rc^-QX{b5aZ3QyA&h%8<ga&h;r1T_{Mo9mb)kN0<Kf0Zfhm?lm*LZ<
zu$ZJ?SWJB=6`FN{_<DMJbB|Ui4dsUdZ#p{{8H-k|3LECh)2D~oc>A+pzjh8OSMjIM
z6ypVkKq#KL;Rogoro_UR)}U0##a8<k-Mfd<?8XQh)NFbOeF9>BiE)bF@g#6^Uj1Iv
zN{rHJxJz@hDGvN;I?A}-{WTis$vJzHbVEG{17m(DNH~~myorQP@8L^&FNkXbtT|J6
zS(}Nf0T#ZA+DL!hDEzge816MdrjlcV<$H{$O2Y!iMbnqVwz;s%nBVDWs)bu?b&^m8
z){0;vlta+?!CbpOo|o*Sj2RpqG$sK-8NrfN>>tSo{dRs^L|E4unsJkfDg8>9zfWg}
z^y%6?zjlLc?kP@OWeklLsZZ-wtW-(buM{pRV3)=x$ZBiFyxplFYig~a@w)LR$vr9`
zc)&i**CWq(;mi8Yu_e8;<O)clc><4G_&#6_)SY1UVHGO4f>+cwL!0zA<@K&oisWS+
zX$X2LXjFzSJcH}i?kKiZ?l`vhP%_$B02PiUK_2RzVbAKmzmU``hjAb+)H6j>BjX$m
zhDNv;Yb}pAudGP-GIufr>_`sRMYIt(!qj|&6xDqx8hm3J(Evq2y1#wP_GvGtgBUi|
zAq^{DpfDkppU;@LuvUJEdM{&VU{UxHx4e##)Efd>)W$@1_Ia;MB5&?Q`*0d?N~sSy
z2B_m=O?Oz=8d-mk!NrXf`FLbE)hmH!>TWK0N=1_O61br$N{Yx99l}Ys=-)A9UnTb&
z^yglB>cjkA-a2<vvp*z)9B?bzs9~ur@_sC6{yWVx>XPkB0NWv_FalsuPqF&L%(~Y<
zQC(ZMh)rXQQKXV$6}%ao;aI2G50i4OqxZ+xZWNO?)u@n1a^!LhTe}{fIVujtx#V+w
zHI3m%OoOU6o!wZWT|31wmWguPBYD)Ads>hAaK{$2k29=?4B^UHzK5iTLy3@GSyvKD
z0yz!@Pk#e&vp(Pozj+eoHPRVG!)1YoexeZ;fCH17t(>3drFnlq2N9S|6|BjD<p1Jj
zHnH0!!$H0ccw1ZWjJdVKwSYd0(F2Clg;>CvE!-Le-B9ZAm!7EmV}U(0Enu)Y7Phm_
zhP%lzA`fjfR+dc7j3TU#j#Ai?KxywWZhv;R7>0iLno)I-JgQRN*mVYRgH3Jg+yEdT
zrcILZrT{Pa)(tgz%b2f_fQ8b|ATNT;9>;?jVAQk;SuT-p=d}p8w{iqO#3S;0?F#71
zxCug<Vyg`n;829~z+jpb1hba49cd~=Z`ubA^T;?QW_4T{`()xHJGzGcWJAJ`*AH&D
zEjBmkYP#lt(7A0Jlmg4WXF!{Bgm*UCm}|V_P?o{+YY4B*f5ek0PRS4|N3)xR<xt;+
z1NC$~LGl{ofkUC-952ID*Ap>j@}eWsy4|KcNL6cbw1=85Tf|PZU?Q9KtUt!a3w>J{
z|JTmnBP<a}U>!PCRIFp3WVqm2Z)9tcOQYM7rOm{wM&F4&>eYPlv9F5yRqW7TMjE_g
zcYB8YGvQdD)))rGsz#}-SJKB+C1zDW>UmS%(ja>^#N?ehZbjOm+CKv67Y+i_Q+9wU
zgR+7*sD%2$H6PH&DQ7}oK~wFN`lG^%?V`$M&QM-!zXdH~H?QB)6HIX;hB`v)%sO!k
z{u$Y5ABrbrn68mxnPX=)8IpEL?9}S*>_1#--=vZqB|h=IGL@qW+F7P|qQbN(WeYEC
zCQ9wG`8c(vON<E1T>TGS1w(=)m(5T4dbNLK%@zFjdXTc;c4+U=ARxu4|0vk~zqggM
z^S_(f|KqBP(ToZ}*93e~tZiv?pg{*hil9k_iqw;dq10{}*+Ro1OcmIQkc4vBawM7=
zH{iNkpv~*(>MjaYR>=rcH!BgAKF-n5<yG1F+4u>TFSq1>hxFtu-%QF;bc`uYGPb$g
zq`S=UpYWdedfcxE7<yv&Qhd<<ib+#q#_na7gLK5aHu%NTITPn|`1%EMJ><voE(G#?
z016lZ*Ym9fBkT_|^HdrV1*PjB0ObpcgSe#<plKKpd%pvrxn-XFZiUZ@-wT0P9CcI~
zDg&t=2qDZ#Iw|?dML&>O{~Ar0gW-`4yK9x1yqAgPPw{JJw}Hi1A@;L#a{B!)6r?+x
zQ5t@x{G}eukS0;;R2!1E1961Hgx-rZ-5N!kCCl=RWv7Xz)ILsP;~By2!QG?f`4a2g
z>Lymn@=9u`bj{*tTVO^qe!A;)cu`EC7ER4UWs{3$VL++F%@a39QmnJdAIHWwy+$$8
zlHBFx)AP&l*7HI<TK(Synh4&i79%Jzjn5r_B4|s)CElJTe1K5iydKE8c4Mydma0nI
zvs}5o3%{}R^2GJW8n+V(ahDrD>-LfuEPw9TEeg^7Not~hzzR#)_g#5eJRYFGn_ShR
z<4>@$nlU$3#b1t0Qy*;bo}8603o3?!TlraR=Hu_GHgis@%+Jx!$JP#7P4>DXE!a?;
z5CqWNm`0X}D+WMFB$#^hr2foDR|6Dv(WbMO9*vRT<liJWSZ7&@3a|NR?<-y;Hs=hT
z-QWvUj+oV?EXDY8*i`}O>cHYKV9FXR(aX#3uv?d>oy<2rNY@L8-=eiXq&#dlH-}Ty
z?f_4JB5TUSS-MB-S_SOG4`{lJg_h#Z?mC6ve*<5k4G^ll?)z-#<H_Kq`YSgd2Zt?}
zIIX&!M7Os^bDUp|dG{W6z{U(JJY!oaTnC1mNGN8oA-cvM!u{p?z-T5%KdAZbXy3Wy
zvBIb(rf^2nyg$<gw&D|2$6n;@${L3OPF~p#Z!E92dOPXpr<j;4hY}H@15=h3s`_27
z>c08|oQ}dn;s}s>^Rq!M4LEJ=vGQ_}Kr6jjM{LGYPi0Y9#=6~ObD;swI~JFcJ&w-&
zJ$2ux?Ma%tC)b2{pu+(Gurqh7@2fcU%zoLA#P)$KIDd=dU%5y5QU)ZmpNs#!R|;>+
za@kK0n=-@i<|B~KvSIeP{qWn>oJHS3LSyITu*{$yw|eE4^+OM6X!%kk_Lue}Bh)UL
z4Pj|xHvHJ(wmovk`UT%$<<_TBcGRKtC0OiH5+R^Jhy4RGVCk0gBWsI`9mR-*?E_qs
z?E_77RMDlEU>bW(Ytxa&BHgEsMoVj*%hge$p<%`9$5W-%cXQcPM^uQq%b(~64%&06
zA?HJw<VTC-_jxEy0v)HOi-Gd3^jC>g3|rrdiV?<63R_x%cyh|3SWKPH`UxF4a;-o!
zRtbviJ@wkPss-66w)XkdHDCl^#Ej8wo{h2-v)#Qvk#GuS;aLzuR`t`T^ohx0WS4Pw
zD`R<Fap_2**gOpDe$(Lk;sj5P&aw5J?PrrKI$JA9{m#~ycron|TX(l{w?P4IlO~M(
zQOw<k?UeIhU-=c(`yr&E`YXN}(HEZ9*qFErll<3nan_z9tJ3DYgDQjfm}qj7e7peC
zHCu`cuCD8E9%Www@=k8nBL-e5JstPiZu-vSroIzPnjBu&D^mREsMbjZuPFH&doSmi
zy0wI-B^1{Q23hT_c$q7FGx(w-Th;nSUxo6nzAG)vKj6#!7Odk5s07TR?x4(Y2kC6O
zN&*Ou{DOg;_Y;$G)@_Dd6@9!f!qvN3nW=pvkLI>c%erm;nsaXw)ebZ|VQu#GIt~YK
zTjCcS)jO&`1Hjd$y)d0O6{L~OU~_0iQgcI6aw~t`TX{0{<CZ_^hrD&RRRo)|^Wk|o
zZUzm78*#fkLs~BT%eZH>cjj@;H#?)beI~`&Q}q(I|AJhA?u?AQMqbfT{A;F0vw9BW
zZ2%k6b)@SjY&iQD-rA;9*wAIx0~>C_bB>NCxBR(Pk^B1?P^Z+pUyS1bx@4R~XX6V=
z2UEd*&XyuDlE_zFLJYy>e*Yb^1--Aam<MMw8)Giv!<xhE#d#x+{$tKU!Vv_n7V&Yj
z?$67DAumSy)_4~g3cT|U#V}xyK<7-HyUKlj+CIAtSaO}m&sfD!Jms&$J=)WSMYJBw
zA+bFD?0h01bFTD6fuyd!S#8o22~KmZq_!zd_Z_o@#S*M+DIp9$z>(|Is*T6DA|^Be
z2W|gIW$ZAkn#>h7uCzc-2j??$Km^IHTdN1rg<fde2aD3EoVyW(gTkhY+yY4}uJle)
zz2O1VwB!a^3F{53r7!f!R*mEX)8TP^wDF=+Ul+*;r?o)$w`mG;lSp0F{@8>P1(XzV
z*Ims-Fa`BWy%n5uS7G@Z`Qn|(1=Vy39xaFK4tviQ-QxgS;Xe2PUeIUyo>c07(H!#T
z=Sik-TKba5^rUJ!H*Vl`12~sBtuC|)xdM?ee@(=@$jeu-y4af#AV4Ec=)LtX@+Vay
zBLkc@pUo4=ER7k(qi%2ebplVyxGmb4k?I!Pau~n4uy-L*7rQ|RQPr}xRmONdyfeJO
zOi=pI-vk9ds^Wo7Jb_K?W|TR(VcH62o9dE6?yR-Qpz_KTc9zL$Eu}i+hqQBudnf{3
zz8upC#<f;B2zDez2QmA#CBv%ww=6CP{I0tK1RjDbNV8`L#SMSx3t}5$VjJ9=8zyQY
ztQOYTo1?N(F<E1yEXqii`&T5^QbPDS4)>i(y$AH-2@464P^!bmT;mc92}ykf3=Tb3
z?|3g?y5>OJmZ7}BdHpvb+E#B05F6a?>5Q8rOnPLk?0{9UyNo{hnQSdJe5YrC`;4Lv
zw<5I`b-<eqY{Kj^t3cw*HWS<gt=Ik1i(c9_gsErHhi?K_=01W`hJb*y0FUU6Vgqir
z37sOZ9HyD1R1(bu-J#=lNbtnJ9%I~8>nFA4!_MSJBd6^cIvrykMmt^ZCvG{5vs5Pi
z3%Z3Py5fm+xfQK`S77?&$M=8hdm#l&>d*=f0%G{%ANeo;M>vTZ+uHtzZt_zz_CS?H
z?@9!bU0xzXCbg`xYC<W6sgk!FVv~Wcn}eiNR?*#X)rP7+>vvAkw(-}PDLbt@dHC^w
z`KYS6b7>Eca~hVF-t~7oi}SgQGvNFEm=T0?PPy)vE^N2T9lL!6)$TNWaV^0Jr}MUw
z6Xvs;4&bP^EI))E(hPuOKU7D5^6vbi(SBX(`76wM7NCY@(!6lyn7WtmZ^_4bpIv*|
zIap~xS@Z7Wfpsh^uD`>$M3ZddX49innS_lR7;G26SM4~K4L_5Txh;gnKgVPnhxXaN
z)G!D&;7YM!6rPlck=M|^bf8|dad>T7VC5oxoCe%t(v$rPGZMqo0td=^e_LKLIq+;%
z&)Fc{lMgh@_911J-ePV(K%}R#mXN|CPmDQuUvYSNRZPcV=%1l?pXdZRVjrg87M9N?
zb2|DV0T;;_>R+dx2`td=Su)Xp8+-Z7e22xWR?MaP42YSxN!aTT?VXgT-7ci*vsHma
z7E?$4E<S;*qAW^<0$?&%-~~!Jz#$KI{&T_$@2f8GWl2}KHI8sa`W_}PL#-rbNOef|
zAb0#3_Sa@|t78e&IvQ*?y+Lx|!=Nj*XW<_2BH+^6Gnx`-+Vpo|I!-l7QEr|wk{iO#
zK^c(nwjjs?$G?9D9&>x<ve;tHUne0D7J@!h5+jA}@&FS7cxc_PHap)dT+(Rfo4ONI
zvx*13!Zf72h5GeXP|)<l0W#M#7Og`SSJ*<apJr|NS3-$WY;Y38&-w!%GWN2mQs~iV
z&n}+gw1)4M{HTM9L}CUwEe^b76nrH<>P9_t6={o+q}(N+>yL3-|2dXRIosxiQ5bo5
z-#w_Hj#x*zk>dUYzx{}AkA%dFbmnZ+#~AESfm8<CL;#<@<RKQO0tkxapF<p_f-J(3
zIV{pAgBK97&(vSX%~uGBw<!Fha1Y^%#Kf{$8diEju#02@I0bQI)p>W|BIRF#@#9@!
z6XeY7gAX=h1|%7>g9ak6ThHxsreG*IEtZ{8UP%#dxP++rkYBAq<QdrFx6EEr*8!vM
zK=aRG<U5f;!UFCLbJ#jPo{ZQ+Q_)gL@~Y0^bP>MiANc_yK5wuVXeYol`4KBD6Z`O2
zK$}7({O_bYx%WG|_f4cT37#P(VN~|WJcSpA$i<82yAXF=|LB<@u70ee^YWSqJ^cYn
z^cp?-10JW5Qv>Yv7~~87pH=|!>y6Pp)%A`bz-F!@@r7CCyglBlQfTivYyDd$a@m8t
zukcGo&0gw1|Cat8L*FpzvAO#f*?)q9fZ+a5(*Kk2W^sKA{X)pV@4$6+HHbwID$GAv
z_S<ZhY+})5-UcnA4G4D9YcziBNCLl*%<!o81Gz7McBg-Sr{CnvFoL9>Qa1t=pd(Xl
zWky1bnPeV15G{D31|n!}F$|XmkN4cTvz+_pSTDk(^uKhpC_BGJED=(HC)3AkB6$U~
z<c+58oL(8{bl!_)o>`Ee`m8|~tQXK*Mr<`u(w-&w0jK2}S%+rEFU!-1?{gBDCPl(r
zhKheO==<dpXJ1>eddO|X+FJ<x%At@p=wV6%RyN~W7AcW&w8R%k+4iKnD3ci%!%&;!
zz=fRQ!uri<y>Pj{Apg@m(!ZoRP(gx#l>Tdr;{S0{_Aai@Zl<md&i`>}p&I|?2*pg0
z3lEmM6|P)Ys{#@gH&cbH!GWTOih-M4O&}UW2NR=FvJ&~Nbnjf%oUSxiNN9Fw1PFbL
z^gM<&Zh24yf+V(UU2OB7WUp+WY>Iw;oU{mn@Wm05aD_-iYLd1B%b<&YTMej=0-f})
zIY~y1Df$;G_3R~>!gOH`Vugl&VznH7Md}$O7uSg2@@&UX;9Gr|L*N_(4mWB+_@l+k
zRKty6fI()E9*`p94N9~0fx)U1>*XoVwgqVkECp&(@@*cN5bX<VlasdB=H48CIufT9
z6ytslU;mUS9)Zly)>kh0ing1Je!H@2P^)W7;4cB-XDI_?5{wPeE#)63VIby@!HV!z
zC8TG#X#cjeS4?5LolQ>K3~^7f9D2xO)uu*A_Rf{vdw)CAI8;|!7KO@;BN%ggZ8Xkj
zM+X8vyR$uPbz&&pDUPM#S=rC&7|v6=;}@DB!8VC|%&<9*SXT6#eqq~*gVpK5vCL0m
zx-IM32xes%-q(88kj)lrLt2P4u7$Le1b$N&n5arOxo*_Sti77?JHWYmK^)?QBv>n!
zNd)$ORiRg>Ua%4DU>LOq=47-)Jz!$@<g83I3%di*0hQ3MI7RAztG9((dxccJIZ+;7
zDCp8Ax}gqYJW(g3tB!6IY|J<<S|nv*<si*Cm|ruPVaa2*Pxsdo(Qd_0bJTtgyH<7z
z7)uo+UfNbI%qK<`a8V#{EQjgTX)UNYDuzO$T2wgJmhUsgBG={RV>&_F&|DYfo8UW?
zwf-`Obg1zBal>-xPFE1C?2218F_Dz-Nq?xMH{|1KIP_zFCCoE-mlQyF5E!ZGC^uB{
zBG-39)7{H7<rl7TTNGJy+tiz$nY|LD?nhj`!gydKOGbR%MT)K7-90?Jv#Yx-I^^+U
zK=f6}N$u!aF+R%2-q_jYMCf)?Q}b-NB~_cvVL1J>S-9PTR~D?JB_(sJy`U41?T`Ce
zzQ&%7Ds1~z*J)P-K@ZIGmMu;_oRzL%&eK_c_V=H{_s<cakd^`lH}dN>w$V=)7BaLc
zbNQL+^ttOgtMuk{&ZxrZ?)q4LcMcfA^jj~mZnBU@XYEu9zbWJEOKlvWU4udHED7^i
z|K;Dm%qIh*RqHSC?Ji00CUa$UV;NS>-NjW6rv$$ywp%ijVs@g>5Gvmq)^%X~)F~MQ
zBgWvreTF`)$=MI1i-m}?EX(I@LD~}5$S3p?RVN|aQq8bh=oZ%3)5b!2LKOpV;lJ|;
z_Z5K{Vn<qg)W8yyu-%v>;OsanCHl%i(oZV1&5p!4q6B@*viwJ8FeDT?2~niHk4qi?
z3HC^k72;}(xv@4^kO3~?i1W1rGSp2-@A$&dhfhw^(6+d&auPmtDQ6p8SPIiXt=h;@
zB`OT>Xnj$cdrgRdFz<Ji^@5Iy({O6N@>NxFiyS`5_wv&Ouf}#SJU72s<T0vs=3zdH
z@V_k62e1srs8)Jzf5*M*Y`n%)YbJvEvP1}7l1dh&<{Y3g@9-SJAGm(8e)ItfcLX|r
z`ETRr^)z&<J5~c%7I1QxiO7lHcy3q*tn>%IFnfYFe~1HLo}N(OyC>tM5}^7&A%C2Q
zcfd5SfaAfbp+rFv92gbfP)b-CS0Y}PP)58%9|ZSI>ckg)P&iaKAabTX)BVkGVSZ*c
zr))ALBhaquLwE>g5ZKEm>&E!Xa=B}=&%M8f@^l%y`jZlXO_sY$4oM0t0Z^eukIw3C
z-9SfUYrD*@{`hZ^S;<DYxw(Hy(ZYX8QL_JGYyVFo_J8V8bsLv;33PrN@cu}X5hL61
zzmA3+vqtMl#q`n(Mk1&sSX9MaXA=MsBW^He@4>s7b2-s8_u_?_Pts4!tnK99I3f$a
z?hYT1nXZ%UF50ixo46j3&0(QI*lXw)k-^j#88HHn6Q*DvW8bNfcarWtZ~@xzRT1T0
zQGpaOzJ5~y9}*+oL9pwPyQH`sXVqgTcQG}kqhg9(USPbvYe)H*W0NmcPf|l{)tUD7
z!rPYZ+E&Lzt)90A-4f2m(zt(JdxL5z-)0TEy9_)EQqhE~_X-D$SP3I$Ot;yDUkCL?
ztPJ<HmETnN4AUPuc5E>50{QLG`ejX8g_@!agtnpzzO=kiZ&eQVZUSELr$BS&%H$s-
zZ3C4i58lo_c4o3Q=x#<f5<RD0R8_g%Du59RIcjeCiNBT-?h|LOYN~cC>TU8v@~_Ww
zQxt+0-d35_B+s7M#ZgA#hUih#cx{$FtOu=Rab@eV+{DvNB?{KY;aSAnTud>f3}D7$
z^QvBROlG0s(B@5Ur8FF(i>gJ}P!E|Ow-H<^zuJH4J73|%mX*e_F<4#N-EnxSz^ph0
z?+tgjVa&$g@@IfOk?{?<z&Md%tqS$?Xh41t?Mbqce}v>fevlXw_Mpn=<dcnB!YGk7
z3P&Nf!_zwKtfv&5RUazXh&flqlpU>*;Rz)=jxKclV#uY;L(&iGyJ+?Ajun}xm*#&L
zdkpU|<elcH&l)Gp(pz_bjP}?|(ojyNKbOd>ST|6mBUklTB=Vm#&A*`dmtZ+tC@u34
z5c{4sh5mbd$QYTJX!j^3V4n88eo)tPTy+Al!)eaLY9BG1m*wt}bU_Sgo=-H_%}1t8
z)v4tp5~VrY80$8nS72v`kutU^US6=hD)RQXA?2PTJ$gs3zP-ou+08WI4J=W7tA`y6
z4jd;=k7BfHSBJrBj){QHL);1%&ouRC&JcKR<NWf6*g}Gd!QZMwye06v&^#?MhC=z;
zwM^+%uMFDDxImZ_JU~jjl)qQoWJgx)0hN_xW*xkQ4qSmQCe2$x_IFXX`AG^LuTR1o
zCq76u!D0qZh7|XF2O0xUxKAo_yZUW{VT|LUE#PPq?)c_08AJ3HBl}8ZctajUD)p*F
z!q$J(8ILTHHuK|D?N*VFFCduq_Yg5j1V}n@z9^m93ttMh$!n4Z4#}Fjj2nWo3@)V#
zXT&5A%2>Q#5CRQL1dj^CR=cE?T<XlSzSoOw@bpw`1Nu-_Q4fwl@fL`<|1Vv1MyWcN
zRWieHN9><>Fc7{nSPYkTz7oHk8Cqukd$LB=*O%f16bOh7;{VI7FJWb7`}04{L9*uS
zzl0Z77q~P9Ja~P0TC+wYgEab?NsFbrQ0_X23|(X7J~9+MP)^ERtubordFnc9OSj`j
zLcvEN;EMYb?rp}k?M5CI6~2l8$9CtD?#<`1PvXw^+p8uB=YBr$k*jh)9LS8fWMtF{
zr5^syOq^W|l$LvSk`o16!10KUA}%wL5XDD)|Bb3B3vZRjUqKvPiDjF2W62emXyPt2
zfT!^8Bq6m`vggl7<W7JNW-)Y6j50Q0@*1usa6V!xvg=gmDKeA@>nm+zJw8d*O2cE9
z$;5-l`}-2vCt^`eQZ3s~b#}3%gtFbI(x<kk$HnM*RmdU$iBmtOP<SjYD-VqPqP<FO
znM38N_P48Ch8z34H)8bA^|j7V;sC4)$I}s2SR9t)pYzchFtkgFCSr{(92wagJX>{+
zv&@ZBK;f5WN);HNu%L4zORd0?jf~v$dMd>qndwffF1=Wa86@De_GDo(aw3&~{}8-u
zkyPyBtSE10q%*6at;KGKq-UPGYr5uTluWjos&-R)awA?V^q-$PQ&3Jtb2I<MK@+>L
z|NJf2f7~5j&v9ejo0g|;C}B;d)s{p*o|&|s{PW&@Omes}OOA%H6^#BO8WYovc_W<g
zhI5|94WRh9^F(b^4vwvXCR_Qg66`z4YT^=Li^u?wFI8b)RDf;jyR*7iHDJ8X?qycx
zvUdk1Ot{0Cxv(P@{l4}_5-t7B@s~*3OaG=2*D!Zbyjx6fzxmEi?JhI&M7>ieR@{%u
z624vF9e#t)YEl|!k=jFe<tPk-L>Hppr=R{t=asH7cZiM>jnfA$KNd%p);ua)cI2@&
zP`$_1xXe>|M3$>$-weNIR~;X>RymNR?7%%F?N{uIV=S&Jne51gr>G>7x7%j00c2Ga
z^{F$ew6@q3L;0rl#CuHRnJ1dVnJ4avYiICZn(d`Kg6RuSLe5jH;qXW#SKshBS6kt*
z$hgo7MQGU^tBwAI!Ax@jz&NI>F9n@nETgM$M2)L&jDss)?-oz@kVpUQ^I{DB{3od3
z`HOU6V>w>(^j&2Z^S*nX2B13;AqwrMvUHQ5i|#W9T<RG{r0a~)*mdN1k*;}j@6QWV
zw?T$_$N4O_dTnLq;;ZF@d!wNhqa&59<R!VSeqY&83Dmci>Pt+15}J+tnM}86vS2G1
zgRhiOM3r8t68S*W_jv<`8oa&1W>~FviZfapMrV~;W<Aio-@d{3;|k>43x$dN#X^jJ
z4SMLXqYkYizC+Wq?nO~TebcZ;A03bP#l*O>GGt08DcZyJRjtWtj!nCpk=%EtD$cWY
z3)3Q1%K#qpFVs#Eydwo#ijHUuS>A%6nB?&4RS{f>_9IYxc#Zf<t&XYrI}@W7NYU>j
zf^D5MYr31(<b<k;l!2Cu?_Nz%fX4AX3Dkf_{;XXca7XFlb?jI?(XufU5?Krywm(y|
zZ{)E!!3|;MkL>c*hlN8D2KwA4_x9)#$TL7d6UZnG!&yKRcJB3WceVY!e#l$bVt0fo
z8Xq1n_ja^|YaU>0bsT?d>ExQ;m!=iaqAa@l1-=vww^|{__s8W{vJ=!nRUUEuPbr*3
zcgTA92^bsm0%`WG;_X*ZyANYtx0N;`0w_`#9*&EpHRj*rC%lv>%W^X|+%#fGN=+ZG
z4!Egs6F`G@nBQu|=jFF3U&I)+N2~H7DSE>OIg1HKABP03`G(Vvs{h5-H%4i;WZPzC
zrES~z(zb2ewr$&XrER;?cBO6Gey8v2`(F3y9%GNOcl_M3=882VV#W-W!niv^Q;h*_
zH^-wP+6<G$JoVy&BtKqL__Zn`;7*P^O(!N*R|UO6md=RNrzit{7-H#~5QwUHixx2M
zNfVPntxQM%)fRNd9?6Io;H_<WuDD9#9%%J*OVBP3ZDfUzdjHIUig3Q2ypiVQM;|qi
zBQ)R0Ah!Z^;#v1q+5v&oenPYviUp3z**N+k2ADf8Z<PIXK^)3-q%s9CKFNZ2h^leq
z6ojh~JrZRQJ5|A9*B}xDcm{`<M1|>!(__2`rDY*Ig+vDNa$2+;pg-gW+A$Tq?bQ`4
zrG~xGrICcF46^=Kqz!nP+f#m9qpwt(Qfz_6Z#N}U>bPVjHt2I5@>!40aMBB#4kdZX
zV^#Jnkpat5@StmzNO}YwJzyj={z}togTxxp5G|TV^`eT0NGD0kFC+n$lmS-gh$rx6
z9>AaIvwN1H$^yGC_v++jT04OL9iXV)(ugAfR=fx_PSfqOJO+8jZ_3n1oQYTFAiki&
z4;khl@E7WH>jXA31?fdf$7EVZXYLt=pY1(rXztj%JmM`~4-RPuQ(~VviEK!L&53Ni
z2R7Y1mlT%c9!}-d4a@4oTZCiNCx5e9HdQp)%=@bl&i+?5mbi(vy}W^$$-i%Lr~RWA
zqKx$Zl+a~_p#@l0scKb&K+TJE7Fj~0v^G$pQ0eF+fgxcqIxr#*6}#4MKKpE6^yIKQ
zt~q(WIeiCyG3oT!kwqlKto=NudG7Yw?#^@Dmbn|*?)?DU0sAO3guX&#FEB)Tu4sdQ
z7LyMF@5e~`Y=gp$piiV)G88NxjGrp{ogAsTfh;QdWU(qOb%}{eW@?g@@Rk}0h=p2d
zm>FZjLG{2%THrt(aS$BhmK{H(3|MI3BPNBfV2=s|!Ba(GtS+Y<P_64R)8s2@J<e%b
zr8PO7RQ8JNbj=D}eDCxE8}4}X^5S~C!Z|-bxm1-urcCE#gX3g<b~3U_OBv(v9L8Ll
z>dMy-s$ALjsOz*cx4yZWCS|R;xp;Dhfm5vty@l9ZPnj;t;eo0!ni_xt)>#6nuDOsf
zi|+Zb?yn1b2(}P6t17aEJ9pKbj_UVMk~~JSJ{!wjVc5)VGBtag6t3~<H82H#pe&p~
zp0zT-VN3J~quQ(=FXNV2v%H>^lf`Oba9Eio4=)zvH7vy)FD_&mT6lZV(1cyB2@Q0a
zSl!FzP+PZ?o1;Bl3r%We5*a})YxgeTF2{9#IFPQ*btyDcwC{d*pC9G4*oydF2pmT_
zTLL2ne8TvMv)VMAdSAAB?HMGYl$WPgH!Q?Cu};4ZE_=a#lDQw6>1<1HvKout%2+{=
zO|?g6+R4XYLxKzXnEPeGh9f6#65@r0j;r;QYn4#M-N!`T&>=*CDd|)^@G=a7XO@U8
z%rH1_u&8SI)_bbS%V6DJ_%reFS4wNr@K)1gb|@)FgU*?yxky%EOxh$_068;vgcGow
zg*(=a`AVb}hNk3zEc2WQ?93hxXlm&3O;_l^$)DK(Sx~Dq;Oog!88(|U9(fcS+Fb1I
zs$I~ovR%^bg&Rg)M0b?|D|eBBbMu88uq_mKseyHOvA&ApP?j$0#jSyNV0`lz1bmAZ
zj64YMie08`^cU>`^cTeewU^A&_l@wTn;7Qr>fdfyJtOd_`kY7{BTGrL90DmMc72<~
zOeNM8&Ok~x{NF^USl@#O=e;L#3{jwV8&WZ$>2_N+S~V>UA!S-vcF!K4Q)v}Z2CsD{
z414b^u}3aR)6BzEOw}3ighghJp$D@qACxI+bnp%UIbrN1Zt7zIN0G773iz6MeFezo
zUBB=A<gp@B#ubLs8zd-oS8Ll$)&$l5W`Yfe>O3QlU~y~HKrQc=Snuj}4vQqR2YHrT
zhs}D!1SKIE&3b2>746SrWqqs<D0n?P!96NjO+(bk!l?-rKuV2^W~#+WhDqJs6)QlI
zmgmJsbhyrCglHqvaOie?<0}1IBc(*lCrL$XGU-%yKMJ}ykgONxzPhY(d}CrX%aE7L
z=hIx*ds2S9<>&xB>U+y3t-U7-k0yno_tg0I=*%d^Ew=JS#tgbw7F3=*ic(C$S_m0s
zkiAAQ{QgMf9^#lL>S$R}DWN&*(pB<}LTRPesvW4f7*7RMnU3IwDUik`KabGYzIknR
zNddVn=Tf$6822M_Dd3!9j@w6jrq+GuMfBoVXwxTo#VNeKd<3{AG{i{IoW4#G(4)+s
zmL^?(8E$*L3LJ}D{s}pVugEoG^tu9DlOL4@KUhJlXn)nn#GbE1na6J_cWzl58UNOu
zesXXIdSnleIU!8qeOrmMg^~9heYB4|7MT;`SUW{ZD9o~F>P{)tvZt<=@N%dDGjIOc
z>VPsx%QjxoTc@DU_AS%|u~xMw;Z}DDo{C;SFXxkDwZF``p}R_7H4Iwyifi?Lz>F7v
zISRZq6jTiz4u)38i<h{G$`)Z>R;>Wg7ur~i42537h!ZA;ZezsbmJ`i4H~*1EI&^*d
zsVd4AEF*&FP^Hf$Y<DgCQ?w3Gtc`VC=Xs%#L6lq84mp}hqFdrB`!_vIO^LRs#vl7_
zH0NDe(H3NEaeF!tmoxDqNCQi#3-SesM-Vf&cIA-?<q@4!Kt)^H^Q<rt?1dLcMG2di
z2VLTs>U?xsxvk(LpED`REFJyM+~Ze11P)Z^FOz=B$*_rZC1#C*l#K5n4O)ixn#32V
zwlz{(?!DbIb1N9BJ;XG3VI;M}vxdQI;xVLK9`&&$W0J<`KZem_?WRs@xw3dhtW$HB
z`y?zgTpSXcOn-QX@HtAyKB}aAay^=J{DP2xS-8IfJ@Nb`Huv%KVdHu~GMp2>r8EBV
zkUV>Sh>d^1r@UTkjr(2r-Kx1?HlRd*K>vGvOOa*+a{>nf>i=t^NAw@{?Vld!e^+qn
z;i#d0fw{WrGjaoy@Jm`?ts4`@%b=DE#sF`Y2)pLT14vu;>bQQ#Z|1nTq9LtYT3rrU
z`Y2Z~nKfH_FNtP>V#hpIKl0uB)xB+UH4Y%9u58C`XL!wYKWATiA8$0KwQl+DbNajO
z>tTV_mFxwE0vxgK>aZ2U1b_0<_QZw~X~U9MRj$nb+AWaRo4<kNF4$FY5FRkX;*F4l
zymkMXJ9}fnP5CDeagQ?JSMwxy&izh<ke7PUNzhw%=$Pd}zxu~a)HQnWOM(9_DXiWw
zw4k>L|3}5m3(M}ih{5BzptsPF6xQ2~%#Q;ts-9|s#AY>BX|cZiau%b_@Dy!oY-CRA
zUZku<t3oJGGSht{v;C;@v6qj%jrQB2h*M!hfXB6=BLGqDmo$ZWX5GEW@@OX|u@>X`
zn2M&rv9Y14=hH<EvEi!XEEl$%bm?9c+p36hj?-=ZYQ(!0Ejp@mwgf3jO-}k+mwN-)
ze!3HG=m5Jid9ImpNg%OOh?I~O!-)2Drp0o@ef_Hb$>r|TD39uvqa0;f+*2`1Fxdr6
zT#K{=i^e!j#<YeJr4^K(>)L1u%m|W`9;TxjP5=2LPVuGo^r%@RL%<Rv8(9}e4s$W?
zg1wAhtv<mSVe7?;yM@Q9^W<ceg>iaVk0zlAPJ7uet6HPrL^em^qN6F;8L>w2@n_12
z3|&p52dK5~VvPkF4iC987aCs%8#E9O5!7vk+4y>LC(!lXfSWZ>TgF2E)}sEOqIu`@
zN(<RmMRX+5fyN?3ZXVPp(_Q8zQnU<1g`Gib)bToR9wUyRbdyuOc{@eX%ngKUmI-@#
z2I52WiFu&?&c<C|+C$G6{-fQ2p9*pD3zKu94icP7YF!fi#wG~>cGGEHHtK`2tz^99
z`wL7eU9z?U3-(hLqVz^7*|n}2q{I21MtWI@4NhJlRlS)hD)FIbmk*%YvX}f={M`{w
zFo(daC1>t%M9x1B8dYDyBXYM)z*W;-_BT$r_(A!5=$yV0YAPR+BUWt%d&``@LCY@M
zA{j?-8JxITEAzKn+H<#|2+KE-vD>@PoL3^+BTWcWf>r)(b@#)Av1wIRfJN8Zjxx>*
zF&0Pl5tk2vk*xj${E%<H)0am3JM`@NTLr+`3-wot5z6z)OXu%fcxwipb=hGXE-Ne#
z#05T=t`IfUjgF-D`r?9f3=1}KFRx-wveF=6wOLPM`z?D5^P~CMj0zuct7S^A!IprC
zDg{7fB!f;ZGgkpit;4eJrkVipjOsACrFL3Rmdh2`9A1g>gjE8Ad)TxC6S|Z~V>&C8
zHg`wsZVrFkrH6qVj*JS`rSoA$6MghqjbjjH@8ZLvHQ&1%<*Hj`3H94_fad6F&zMw(
zQK##gX<hXLpN=DR@#O=4)Y6SAlVw6s<bJ*;%*6#_=RVOYnUwvOquR5-cW!))bm-@N
zxpN#E!?7Xj2|2C+0_FNRNPzHsWhm3|qr7uN9N#cvp*;Oi{|=XAr5^*R_;O9Mw{eDw
zlCxFvYuC2+K*PjcXEqHgrfTQp&9t5ZsGIQn)|j15<uYaEfZe?c7Y2mLOw@(z+;X(h
zAz)dPt@1u>{q+EIXMtZN%RB_fg*Y@x-X49##k=2SpGW1MaC%#d1^?*ds^ayX-H%T+
z&;x{UF1QF>gxP!?-ZAwcv)@kbn{3^L>ukJiTkwRc|I;R+QoeR{0l&by5Ojz6aC0T@
zeJ(;GTA|SEm?0LjJxwM>am6>mYv3c7TUT@d4GVdX%MW&~-@hTkXY6Y^A3k-C842qG
zE@e(zRLad9y1EvW7aoy~J_L+pc2IUR0+aC(=#7Eg2j1h!XWP&jAF+V|fwUh{^OOWW
zi9N_O*Qzhff?8PXm$LE2!jHA{4=AfbV3Yo^?BsZGka?O5KV+=>mgbbXFQkVngg!TI
z(Lx-Ym41OAUAn-0f)KyCVR^-Oph>QY+hE}lmj<Lqi72%zp!As`^#8yUOa@Pn0%fYh
zz}qX&-1DTHP!NK_gDE|;UMV`Le&s-`i4+rvEHhG>;KM_~IsJqw>WQA-jb}vSMX;3V
zjV}2Lka3O)rZe(~nox6w9->iZ@7ar&H#c;i5Kos5uFW?yKyGb$Om)z5OD~8XFxlXx
z7#0&B>Z}3bhC$}58&~TZngeyd*^bjauqV&^Vp!3pv(Fny*I`uLn|8TciQmY+^9SEJ
z7+l~-(~@maCKrQdm(#<VH+~_3G~esoA-sZ?N_7tY3epY1X>JiE5kbuLQcoPzGy6u$
z&X-lUy5j4J;Cjor=c?U1aXn=d>RQ9kpAxh9W{9}@<wnH64A>%WQX}6&@|E~pC+B<h
zldSM%*DkLx)sP+um3Zalh=xnL?g{z1EAwEt+vdl^?HX5*aObe+8Q7L<oWe6i<&nAg
z3=_Mr(eRF=;oTECW$e#}sk~+^Z($@*JoF*S)nAe<I&5~2aP}oth=}TqOb6av8eXF4
zQ~nZvmw}ra9WQ3`t|(><r&nfJPs^`zT&SoHX=wTh|L-N+|Jfl!7#awO4CB8khyHOt
z(!U=LQit?LIYj;TTQy2FAs{5=Cm>J_CLt*z5`{&BvHIOSCi)X3&XUub%)p@$``KS#
z%c{Cs&8m1Q(x$t*sTo{TCa?)lrLuVuPj?Z|@_Iq{sR8P<H)GWxQTpUsEr4y?>wfFG
z``K@V?K}zm$`9x_R5N{`%1ZTaC<JeaoY)QKhI%N{hH_-qp@$Sm>mIAdCSD)zOI0Wr
zvGEhcYj*q$^s69<+inzAPtooKqVgW~0zItP(2lK#Q(Szls6E|P)GttC#Ark5izmo-
zy-{C6J>)wVLU?m``at6PP89gDdjrC4gloKr9R+T(gDl6<eZk!L&tB3>a;aMzj$<yt
zTqHHui8G7U`_6)!Zi^RQUxEWn*qKPSnTQt;Lb~uXc$Tg*1X&X>u*s_au2`F4*TVLA
zxsptgRB3o-H}z{(%E^mxo0S#p%d-pXsJwY>yK`%`vL;wXFjTgK#y_Qd59VY!6WFKT
zaS2rE4V4(SCud}<O|JcLR9O0^j!@8m>Z_;Rrz_aYBovVdxFvPT92>JYDws~F`nFm@
zO@p3GX)(Z=pSWcH5+pO8P1Z&I3C7T0Q)!nMD;PBRNSX3PK!tE8>(Hml8)}J5tKG&Q
z-0REJ$ee!RoMdV8`$|+`mq#9`KJ${emsB^B1=BA2wxLN|=9VI92`MszBF|`RMRzfb
zNjsUNgW3EX>Q)wV#-Je44!Y%ygvrA~)`5+we4Prm1c~BFVcs$BC%DYl>I~kH5>Bq=
zG3om?LBHM%miG>s!nk6@>gVdfsth{gh3O?}W}UJpiWYeCNkWdmj@_!}r2dWaX#%8j
zp;#*O{ztNQsBy{sex(kiME5#EJCf;Z=1>3-bJw7&>ABSlqL&)so@e~RjS*h_Xc4k#
z&+=BS!??XqA#B7V!?aV#J-4brtHQ}zLyS9>^6HM8S1@xd31f)!?anEtR2Ks>>h24g
z6OAC`dvPQpl+%hd%3W}4w{3%!k_dxD4@cIdlrs++656DLQG92po4Vac;{FPKpteC&
z5;iO!VIB^kC=!t})ze}N2j{F#!`0yQ7sExDjy4;EQx8xW6I^g{&{&g^<`G9Qza3j6
z$ASTIyE3DKv1H5zzrifZZ^^_cR8tSxXa*qSyaa;rNXv`Fc#~RsFK_Zo94gm^JxJX=
zW)7K3HH#O@)+v()5jtXQ#t`d%?X+YXD(>o5?J5^*;N9Ac1_73?s(sY1id{2K-&nm9
zcF1dj0!rNL0W7qZSiL<J+}{3LmJiVpyIZah%zfNN2VB|(hm?4D=yu>Vi<j<Lz8w{`
zGPxkM;w3bj)vEf1Ei>e=vVE2hiIKcpbX>oo?SsK(mJgi~z8g8*@BZFZ7tXEUe$L(?
zLY$0(H7Je|@t1;IAx<1_WF_7<gc%K@f+?E-6dFeY3!S2UnXck}|3S}q++WExlIlOH
z{PA=^Rm9Z{icc$)Q%<Rf(!pG9<4!Cy=UrHIMqyNHDxp}E)KV%UE#?;iD%F*%mT+_(
zLOp}?9!afvHAmyQES9-7N0pj7H1daC^~1;IHd3^CEJ$Id%0q+c>at&(>cZ)ChB|l~
z)YZtc+wFJEwf2s;cW~^^s>~(_>V0ee8LimvsyTLIQ#offW@Ga$ZMs)X%4hJ;Ir3Bk
z+BGD%eWO`i(K!G?ubb*t56e^CE#7)|k~C?by%$RM%~0~JF*A3gTh9a;sMmbK9-gcz
z<4xf|?qbq^y4R~T&{^N!))dPJC~{<++Z~?U1bUim8-yNv2D<xQ#vj9mlURJ5n&#~a
z;)LeW(=B_&)sam;HSPlgGh`8NO_T=Dk<+vp*AKO7qVz}_&keVja^Mmmb5<;k*TZGv
zu@nSc%}1rZdT>g{kK01ifXB@uKasA?aL0D4p4JOTO7mH{(o{w}nqMyDP#v{Sm1#Zl
zEqvG|n7e8&Cd^fJ0IGbi3xxee90@WGuF|x-$rd+0{%IM41Imy4X}^Hz7&>-0G_1eJ
z(g=bUPDR%9xypC_d=lM>DP%et4P14)ze7rGSR5Uk*zNlI8R1K4Oh3BpeQ0Gm^d5e2
zlC=$f1Q|)m@K>u#IEHJ7efTK#>lyk_<I{8vv{m$3Rt5BrX{QlL_rO}|@JfE6z;Da?
z=7j0VeJR7^6Z>U``H{cE!Ea0OUhQ^{=3VX15Nu!Wvc+$o+}1|qQn%Mguqh#xO5bv{
z8lY6<Ly;0gL=(E>3&N)CzW>^^(a4d|j3BUxZUSXG-N$)zO$`Y)iEDI-E&hv%CVN$$
zzx<(C@300bt`^A8!oCvGkg*3z2&1oNQPxW61YN+k6fB(G_p<l45osBfBkyf$F>D*1
zA>b=*A<{XR7_%}e+bOJK0YsH_KNN+Q_GW#70`is%0v}*ZO^E$Co^CEP*aLN!7uHSR
zyokDWgrYE}i4(iMjpCWwg@CnY!+gIIFv85cgS~RaE`j#xYL?n7@!Xn2ARZmV>V&U;
z5m5)(B{xgX)*|IHP1QqanN{cOz;@Gnm_q#gvjK7mq_bG3BGk~;_3`V(a;0R~+XPYs
z4kx+6X)u=Yh1Vs1{iVz({yWf_REEoUd1b8McLGtr+gM!D<haa<8@n~J4L`+<CAOe~
zMIG6CElgu0KYjUZNnXYrrt!JMrE#_*;9emXW!VB}h&apshr(ook|SDA<z=Bx(-N0?
z%07tYlT&F32+@R!f6Kl3;tax?d5Tzofv3>gRj6~?V%+ZvNWw8iVWV`Tk%blJ&~_Su
z1QqZR`F-?ZZmUXzjwa;$TU|iEh<fDnKD+h>q(KUHKNVZBD0%Q}(`p|sa8uTh4sFrI
z$ZPPg8@wyKR{5-?6rGp{Ci*}R8i7Tu3OLoGBKY*6*Xn#8_8i~=#6OYxu%JP)`T-EB
zO7YGh&?0sIfxcq)!4MCj!)H4bXw9JzLgsw2+*T-dmE4vfQC5N^#O$6RQkllxO%@<7
zjOZ75S|heUdq;`eU=|!WggOIPJCMZgmAW(ZW@uh0%w`rRPe)cFmS;`T1KLwjZzVK!
z<YG3Hw|0Iwq(Se|zIy1pOwmG5BU!m18o;X`>rBc*-TjfOmTl-X1?#sn$XE6T{ibgA
zhh4b)sp_J=r#f;zkLR7sl+RhU2giIR)ZNd^4DwsBb=(+cLi2)96}JnKlLZU)N#D1Q
zRCy|SBQzsMmPs2+(F?9eb+u_3v^?Mnn5_ZJw%>4LlBfrj+MX+R1Q+C3Hg<srXWdQl
znHD{qqT*ISRYE-5nUG_ox)QFm2F)o=DrZSO7GA@JlwX<99*Dg0_*Jbt0_hRVY5x-C
zE7H+60E8psN@ZfWZo|@JNz0|nLK}S2|D!jj`{CHO=DPmFx^7DQx}xfKNDjtQds>rj
zm&sVg-T$208{V4tBcO0YKl<F~dn7>3v%zL2ux$pQZ>ZMkWP@U72Mb103~ei9B2zb)
zrPxbpM(z$dE{?CSiFYvdLT`Bs!L_46cEC__tl&$@J?-@cNgb-hfq2e67SlO5xOq_H
zkjNFblsFIM)<~`&|1^}(pbuM;7_>2+@sMcs3NAhQq(b{LuP`rx)yFqnqlkSIPU;&;
zCZ`oU-Lk0t%nW{k^r~<nT}FJG3G*^I=oNI1>VU4`I$RG0yv|#tZ2eql;({Y$V5p7k
z3qeE7?u#!=VNf7<u=H1F?}WL|I+bz@W#^K{&LsHdy1hbM=c9Yn_84<)$0&}QI=#Ul
z*OKB4s5t?LRP9lNCEMR_o$TmL;b<GNEC~8mUN+qDE)Tu$Pd~?2yU;X)0qoKPNONIO
zzI_E7>a4ZVYVM~5&kq1y(n)G?S)fBds92Z3(f^%*Pu}r3J^lm&G6M$!BK!{mE@NV2
z=jib_vDCZLx+uceH2fF@A`;iGhGt?<-On}~@ubXPpkWD#&tDkQ`S3&&VTcJdYP>AF
za;JWClgwmE(+|%_ev+kLiTNPXrWf6v-J9KZz3V*Pug{O$Ks|wl1{iKC-i3rg9<a~$
zz3_y9!5jYAD6G^vfSCp?^{9n=xEO<Xr2%Rji=tmCQLO=jn-RrmJ%xzGu*Sj-&r&f(
z<%+(%b*b(#?ST5%@0TfYul|MwMKhN3)2?2&nRto=sN%=VP3v-bb*h>Rv@|G=Q1=}}
zY-3F{T95cs{w0f+kU138fhYC>_hlE-v~3zY+C0eojmFGWb$tjqQfiolDkb8)zOKas
zky?ert5AjJcJ6To*iOT?RBg8u#Cp4COtQP`f%c%LQ)VpD#wA&F_W=j^#=b;^Qx;pN
zKB>i@wosv`<v=ZmMQPpLJXd#XoNSiKW({h*k>_@d(b@;Y2u>d>l`LR1&l?9c28fVU
zteDDQ*;%;+P3`6PyL3<<vb~3zf<z>-Z=DfnF<>GpxvyN^J1#yalG?P~t*a)%^ex?I
zV}`!~F!vUH=C4H+Wo0WwNe%n$WPawa2fYI)Ror4DVK3pd&peZrF9IX1uF4-}`{W;0
zMj(YSRry9}`&7Pj^-;LEa{6UuXD=uvk(qF@xD7<lk+D2~4u{QP2F%*Eo&f8;X@ebk
zxaDM>{ESJQPX?##@wa*7Z7s22(zr4ao_jifq!wH3VU@Dw6WL82o1GoAVA5uY2n;{?
zt-_q*<W^z7?9!TJSW(L~kBQo<E0uVxtPcv7j82HR_g}5!pd@>0ZNfa$&{3-ZmaE1{
z*@Q1l`|-Pqvtn_9&DPlqR-sniv891H5bE~0yXmnq5=aM$<eIr<?5dZ+Dk0xzn6B^A
z*}tfdlbnwu=PVs8IGh}EkD>Wn^6X>ngjWlT)(XVVv^IGo-e86Hb1$2p9ddLh3_P6%
z!YAH}Rw;(*;X>if@d5?UBb2Oi02u(^#H1q1#Mn9ZL{4BSQiL;t52%E+@RGf8sWLk6
z+H;cq@c3+U4`{U$QoLcG4T2cFmwhAey>-R*q~!X%S&R~g6I1Z|KoqTVzB#tdfG4O=
zzwC&vaEesIUT}vuoYB#w05`UVY@b+4P8!K$a>`N!#iE$_BeKs@h6SEy{5vIM;nav(
zgL-LaW~^l{j2B!@%QuKLM#O%^Pw1d^u{NCEc>!-`!Ga)Ra5b}ok2Ps<Av$ke(thml
zgvo^pf8ONy-&plsvFjAb#MtisGWfTG_E(U5BCpCXAAq+Np)WuT9fe~0%X^|LG0H8R
z57P)`tya@E{1ciH)zQf@d0G6r4c}0$&-m5fHI_@?(bm5qfPmnUfPhH<W6V%AF|{@^
z`VWz1j(U`qmKm-uJP8Lgm%JpjO-s$Jmw3MQ6C-R2+`PPngn2+eE88uCECKVF0XeyR
z#X@`5lE-;VMNXELnVH(9<+&&pTh8OsX_wydXN_M_+>CS2HirZjK~>g_P|y9QZ|k!g
zS6JHj`$rVeR-j+UT(ktHk}Xbu6L#2bt_a0|7qxwwd(TYF^S&(`ufNxZ_7-*MyYa?#
zXy6zWH$g5w%0L*%DIyzhH2BcXU$4kGNH~1AePs5tVIlXG7%_Wk03RhB@8NK%!$yqQ
zEoS|5I(A$Xb=+P6I6%k0BdWU~w_NyTlHpTP^i0GtC8iwYa2|Tp0%I=P3{!s~9fYY~
zvV&?MbxmBXWoWQf=g6W&GpWt+9*rSgDvr%*%d{qG)dubbJ}1H!_7x@!{3GrO2Gm2a
z;N`B4(C(AGd>~4^Jmzc?rIJkMPfPCV6y%zw;Byci3XC&|r)gqjH6*0eXE3hLpn$KM
z+#8%4mKITml+1?=Fw_FF21IE^Hktm+tBBL;eVS0T7%N1k5^^n1Xr*_!+D_NOM21Eg
z!S+@M{y^eiaq-R$Y;psqgLS44l>-hkqw~a;-ux_vhB@ob=|=U*!HX@w!gIyT#uAhh
z(*3(h|IEW#Bs57B{BrAAdy<v5I~5lz%*>B@usnkZ$Z6#>AC$g=R;e8ugRX>hbiqUm
z2}Xs!-OHhj>iCm`qX&7V?8#orqGM`qx+box@tk3Kpf8?Ncae4ra}5qJ(gbN>B{|H2
zsh3!1uFNEq`CC>n$&JtjONygb8y_$4ayfU`LD1rLF;`OEa$wWyRFypqDFmSz!pYVS
z(&8wcmYYC?1VGt=5MEi1@|=^<=J`TDQ;5V(is3xm)FvXAY!x*P9SYrD=tdS{M`Y%*
zd7+<?I|@G8(0=FkvZkp)W1OAlATXR}!aZ@2EH48CPXbT&SZ<6ro@vgBm$#Ltx?gWD
zJ<;wnr5$RpdGTjqEWz+Hdp{|Hwqu3?bD@MpdG+NI69U#==2nj3(Sr&p0KwyOf3u=o
zrhGxQY^?C9fIo|`===Sl32~0osK2YjBcWHLtn@d$ZgsC@D#8!Y83R#K(LNDlRBKUa
z2R0NCdt^ez8&rq4=)mLKz(9rzrP&}K0HTvnIWcaO)@t3KSR`AP1CV!Y6mL{Lr5hqs
z6mQ%#V{f58HV28mR(|@P>K(4Os-0^0tmrfpZ}ja#J(RBK>xq|uK>R&1DBtKj3i{vi
z-{paMyT^>a`8#PZDS>J?R&Vlsb~jp()t|#n2sUT1mLvtmim;YKigK)tFq!q@LAbQA
z3PN%>1g)5JNhPkM8&x}QFWrHBJ8_1rLQ0asxq*DUGf2sXVNjpnL?|zn!J(BRVLP8}
zpvY}XecQM2kjpc!itqO0%zwf)D1IzD<(ukjlDwC+UQyOfSxq!upHZ$=Q|rK}+aFVK
zPLwt>CM;Uzlb{DwWVjjv?o$Eu)M#f0uB+oSY-we4F}2IzRKl;9Eo-sVj3ro?lO$oS
zl?gsG#j?`oz#T^+8Po7OeG7l?5jqQubBJcv6uH49fw<DCA7v|#?lEIZm5f02ZRJoF
zPl0TR+!Z$K`n3E|W$ud`cP(L4MG+RWx7WJv)w_Dq+fmgKV?^)~4`;+Ra66+|dWD$}
zuH-!di)O_smoe)r{daHbc7#Z-p2uuipep8o#q{L8$WieU|Ggy=10zVp7*a3w;v9)k
z+cQKSTepDRCXR%jL_h3qQVOZij?K`^u9eqsLA{e)myi-(v3yOtUqvr#fmZ?pvoZaj
zuyz#CnwO(vi-DJRs-NEeP%wA4E1GBBR<4!Qhg&qziHF$clzrIU0c_@`MhxFy3_^KG
zwNS;^w9STAwvpBMth%2v=#{TAvy*I=<d(vEhm%3r_QWK<&HiRuZCjd6Tf>j88_MdP
zI|w{;djt`o(+=Qj2DuX6H8(oJ2U?L0FyoE(lfO?Z9j5qu^c8w@j9@u{IyqVlA-4h!
zPXw{uP_pp29GLQu*5(AD`rc8Km1yf)DT5;KV9;#b#C^@G3pss&Q+i6G?B8=rchMTH
zK4iC@?0bL?m!it5%E$HVN5%+)&R|lRqda7sz@$Pr4^ug@>qZr~r`*CNFGd(23{l_e
z@q&~O(46;FFZyg!BW+r+I8cWG`U1qJod`u8hD#Fr<72fs!<*tv)Dp`>Ub1&To+rM7
zR1%}RX`P)eiBv`d<F4sj`b<)a&^1k1=XT;T`VBFXKVnM&sT!<Hp4rSr)7N)xw1s2)
z<N6+G)Uig*>`8-b`)M4#OT=dOIlcW#2}6>-1N-ifY$&6(&l7mBjAuhRDrg=iE<{nD
zy+Q6v%*OR=CF)wg)QyysRmB{aM_sF3Hm{pZ@kTs|6oZ)9qg<MF^>IuPBIY6#*7!n)
z!f%HKIJu^|Ai=Z~!_Nk715ZlByCNXwyEw)cQLgsXZhYFX#a@QIVV-a9A9S<(e<t4&
z6yx-9_+s--(5deh4!{&;jrs6#AR2$YolHU9X~q1+*m=Y7;TE46PxedR>JIU&9{7B?
z<vNXV(%y{aVn_N8EM-ddTjg{3aa}++VLqY(<De?5Q409xys_P?+Rva7pb?jM>e&W+
zWyw#CHo!!*O#9xKss-zU%8Nef-StF$+;~UF!DSS^q<4^SgGj9Rju&5lNftTLg-kTv
zy#_4l^@H^iqccpC#SurvNQ?DWa_%{p?4qtV1Xe=s3QnUU3TwQ4$j04sw=Xq9S&J1u
zUJx+57Q847-BQSSgb(vgcKLu%TUheM4v3{$_em)744>Vw8*oXClS8aQcI-$@Ftjoi
z`FspPq(rqEUqw5$Ok{2v6sxuYx&QU3{kYMA!Tj;!80!!k6V#v;sc1zru2%?EvwA}&
zf<nLTJS|2Y`8#>=-L}U2G9M_RTct+^y?#vWBGXjftJqvc-~)S)8I)(ro7V_a*-QyV
zxdtf@_<)?j6IAET@82m9D?crk2QUy&3J4Go{eMs(C36>NV>>rnF#~54Hv<nP6GvB*
ze?Qz8qhuq8EP&vP2DO|TBpFpkK^3V9R}}dI{GvJ$fn^R`P3mjevRsT~aXsoXF)_o4
zDE^`UaS_Kjy_K^9h2=DMb<xN)Gs)ff{5g_V3v^V6F#<HNAU%W-mY;gcZkVocvq@pP
zKf03}`i^zP%&m`6<wW`PBB0>j*>8lQvB{CXJzeEoAO({&#D*_95fbBVLaN2c)__kf
z;?!+jqvjaW+-&XJo;XvJ0W-E}(=)*Yd-aD&<aAXw#}M49W+8DS!DP!EI`pfw#c1Lf
zru(#<QHbPt2|}i7Eq1guOJz63y_pat!|GNv!>ffeOkdb`l_oUfm-K`UYrB$_Nu-vI
zhSPI)O@~t-P0FtbAA@Gnp{&ydQn2t#Q{84Zwh(g5^T6e@OQn}kGpT-+#h(Ue+;$bI
z+<bw%d)sZJW;1cT6XXXb;aTsgD+#QSB?h_!LYC?gL-g5KSkZbxXw$I<W$%gMk+$oN
zgH~D=thaP7rYa1ap$W$NG`L3MC$kJ?`-q90wmW2VZtyw-M!0fy{KTA}>KM`A_^<=+
ztxhd#iFmCCV?Ob_pM4*l$QBriy&&FNC;Wxg^vFS58t6h*w8r@zGytT#Cro(#il0y)
zsq{Y{CxCb!5^#)l`Vo-y+0hHSjgzdKN+3K$Aqstdvx6}u@`Rmy9&p9Zatcoqj1j35
ztchfoCgHUip`e?Wnh8L9dRHLj5H*R#%!K3QU$e<&yY7H8cRg@K|E4>TUrN_2XrSGX
zyv7}3uTlK6GJZyO%+WNmtTEUH?NIWiG<w7M9ClXZ5`4fm3=98Q{muBMuQ)>OdpAdL
zLE-_G)Fx6?RFCg=O#bgM-fIZAX!!vIH2W8n$^RF|Chi6{_SPm&bOOf47XRS!0`~US
z7DfjD{{o8;g=?z?c@&;nnKi7+Y1_&L8J*{!Yc?1Ny)wa+6d`5!R6)CG-6b<uDa?>`
ztIpp^dLM>;0e=2|$?m#X>5~DF?aw`4GaS!bU3$HKA0Rg<>v(7G>LbMPsOR`z_I0*0
zrLnL9YfiZ5A0hnB>s%=Ix}?OU_T;nmo^G7DRQCdK#HN}8NaH8+ST-{z^J8+(WObND
z$1`L-Z$zaQMU3D2%=7KCdqK=G&J;FnbhGg$n!hif-n)4}$!!|6`RQv+sHFE0MM@Qs
z7p{C^$nK=y5~4@G2-rCBuF;7lnM@aktj0G4ODytDrjrrXGv%H{kIovo@liAix&$08
z1s^rnNmXkwpCeLi3j(l#bG~UudhhM9C$_8&CnN#dvTwfZ3WRF9S72&ML$0=|Qb~ma
z>lFjMDiB=Y)gwuAbSIlRn1gI!s2fGqsAI6q7FkX}w!5r6*GSN$ucUJ-o@OqWFXyON
zu6;Ur%ThHlR|P76Z~=@J_VJ{EG|v%XE$?^WltRpkKG5vKP0Wo}uWg0S>>3f)*?lao
zKkKA^*R{4bJ<liD7w!FW2>W}yc=0rKnq^g=e>1zfv^{2n{wwU|KmSDx{Qp8P=wfPW
z;wWZfYvO3&Z0GpTrJw&bG_AE+4iTh8E%yfvNflJ<V~d8jBp}2NFDJC{&P+XRkm-~%
zZ6)}YWUe13`vUSyzP)lT2C1NPy54X)+HktQs@>-E1zzc^iKVQvRF~Q>kHyG66*tfs
z2n}t3gRR`{PRzGvnylEZFE+>`JcRZjBoW%;?Dbq_fOjXLeO1v>d4}*iY?HmoB-E|R
zrYG6*EDa>I;;3Oid(IT}-I=0QaZFv_+eD_#xxqt4?#tL`s){ZeBH!Y=hT8UCuxkzB
zMyQGBzUimc&;ohn=d9uB$3UAvtsJ0hTRp6?*%}VVyU&7w{vJl=xoU$6xf}7y=3xO%
zV|~rc0!l*vuENjuOtWo*0_(ndG?BXC+LeCgy0Xd-4#1pL92kqmn17!?#}ysS{J~^=
zd_|XSaN1i5mB+_MA5#^8p3ZWzkTg5lcInVatodWW+vDzFK|rq~)ofjv-L~}_L97;)
z;S^juQU2|kRlcCtEb@zWP0ZZC0pyUGEr+8c0|j&F;0Wj<GZEwHM7rD%(t|Sw@y9&{
zMShriI9{(%_RAm!(bDJ;xVZY_3>#KQ{}QDwMNWS>_G$15F(6S9OoTfYWt-ZI|CTFJ
zs0d5QJm$yzPvaiJZ{SaCyl9!Da2VyWK{S>Xvidce0TF;fh9MX=+d>T`$ZT@5L-i4+
z*JggZTlt-EGsWMrwBs~57W@~6(t-YE5%K?H$-~*iNzVRXk;L_Ax6BI$21X1f>k8)T
z3I-<%c2)2valU}NbUg8op(q$@j5+y~Ik_lU&CbVs!GUt?WVW&>7-3zNpMa%;p@eUO
zuBxF15pj^SK(R!suce5gt$?g$J#QqEDA<p9qR;HXE@z+bqN+a!NUWk@qF`iT2>Qkb
z#`>oEKk6~m14xj6V*ky;gkFy%6Zo%m$oVTBF#qEhL~LDb82>j3uI=Xa5qxzu%_(uM
zf3@v?Fyz#jNSZk05lkF4kkZDGxF!O8J^La_ivc0DZ+aZ5?1gg|jeljqRxC!iKHtXS
zR_Ag1an1Ajb88+@i@c1`a7U<^dzHRKUnm%ED1AvJXiTDsLVY<A`3Ewn9A*)^S1qRP
zT4`Rc8_hHEb<W$>Z%&AUE!CKYuo$qQhOLdpZ5(!(p;jj-C9{pVjUNLI2Gf-Z&+pgT
zq1l(9SCwVUjsr$pcc&r|eKF54B?fM<n62J;&HytEPk~a?>HLZHPuAxw?gx;GbV)e(
z8Ps+^b8(|h=&oHj8HU0MsYZ#DUGO+laV}l*odF6GvM~h6=B=lZofycfc*Q(QyRbJ4
zSw5)st9<fci(4-deHTFFc4JW?a8y41&)ULpGfrD07i+Zm#RQ>LLaxnR!qunuA1-YM
z>B#d~)ivvQiIJCM2G2gc7=KJJ-7}k8w2GnUPOFMGQRi?7@{oP&Ne+}mMX@|k_;V;d
z`E|Y5ATFSh)Z%%SgwsS=QIAO~==PS{e6b^5Sd{#=zakG%jem$s|41D>TQ*S_pqyUd
z9EFOcQV~B$xjQ-|RXqL<pl%~>+PBqqf~UGl>Q;3WP71HR!@ZNxO`y%!CBOyG!~xGZ
zfR8Lf#2t>{794H|;`-*w5sfy)COu2Xa8obzf%v}pn}Q|Xq>V@UuM$f87kH@tD{w^q
zZ-Yd__OH=Y=pPvRCkIr}kpfmg;89~!TV`Q<XO@B>nfwKNTMc2z0LAJfADp{qiLRw_
zJ?<;~S%pV*l8-+d7irr1TQ|DR{wlkj$?Ry_+q<Xxr+Go95EeETigg7=<%;QSt(|SP
zZRs-K^+Og6saHRK+o>Z5t5^Q@=q;pB-<>gnBPLuMb_9S5CgWxc(sBd_)$8d`X8K-&
zoLLRTWJ`3(rJZ0#hLRZPE~ZfPm)J(1zd(H#SD$<nE+yxjGsOoleh1OjD7J&!Zy)j_
zK3P)QnO`~*l-?7EKM;LnLFPV(K&|e@&lJu*68MnNWdYV7P+XB>`NZHdVt8Csu9%kJ
zh@=^>D4hcl7$MA&6D>Aw7K5jrAKQ0=gcoy(C~Im^CIG^pU@Qh21L0YOS#x7T<NyS#
zIju<BYKIr#C3lfCjtD)n=tcnz5ka1M77TTntz{%_G0Iy-qm_QBmA`FnP&OZp!2T5%
zD3E_K-uSPvk@5IP>z{E^G_afhi`~SbK^5DMZY0~B8B1%GRHn=D@uN-lCz7i0{vsKc
zi_@$xN-l1SdRMcR%APZSTE#c%szvzez^R+`?zqi!ym7C4J)@`R2jm_o3C6gjl3bTh
zP`VQUh1vljilvbfX%euad=oFQX2*)DNEeIk`^Rh+&U+j3iJKRPQz*2+#l35jDJQPW
z?}S50$sen*h`B%+Gg<My{jp=tP~DFUn%8BpTfEHfWLO%+!|rC<AiY(4+XM5FOZFzx
zjhj=!u=vO;w6tS!Uu@%{ZJR~QVCQYKf+5Ov3u|S=>i#!sqLI$(v5(lFuX&PZmSwjA
zozP+_O*{-6!!Ug94xN%iS06mlACwXu6h&4oY-DzF_901Yi@I**?Q2-V0hbq@{U)}+
zO{53RlTqI}3pWs2P|~5zmC7uDz4}m-s%RlJCm5%NBaFlrwwv-p*v)kMt&0a7E)uN}
zzZQd)4DS;6x{9>ViqqId&k!=@Vh7nj(EZ5qT4=3>$e5VgVt6pn$*30SQaqsV(Sx8x
za>)rwk>zcvBp*bn!!NibdkYu92Y2WsbOg|XhLQG31+0O)XiyG0x?u(JW&8#3^5a>R
z$Kp93VKw?H?jx5NPT&?B#q5rE(Y8cO<7}gLcahGq515R1ZsSEtnYm1LNqlMw5wuv^
z#2yxa)=;lo-6ExcUt<G4=8ye<^Rwmwm+gc7E7@y*{EJ-ezX9r>4)F3aQUm-5KI<hq
zF>^~VBDV#zL7w#T-IPShP=bGg=HJO^I|x^ky;0s9xgqgBetc0lup(ZNc3w<+G1-M?
ze15%t0Mqy9#;|I~(!|=t;)1=aT4lr-B^<*{^CfVM&<Z`<GR&C>?kHNQXfrg$#g@V>
zvHPUL@aS21=#d3X<_aSy4L&Z&4=N>44txiy-d2=-_V8(Ci{*MFj{#EK%{b*O9R-rO
zOH9zlu}!Gnq}mt6Cqsf%R$zjL;gpS>bpa1i&j`Yd0F2O>yWP7j@Tmq*Kb!&)d^5O0
zsls?2Vpmm%Uru*Fo@9T4hE>VnDeEUV=fo>T)dlDlpQ^bc>)g>2M(KtzXMc=8?))t^
z<qBOB6aMPOP~iWXn*aCXKPMiFI#SqwG0>#iMHg(AVd3{fHsKIJ_=_QBc#)4rrl4fx
zgGFNOdGM5Vx^&a4h7Uu*4*ZQ+%GN3>aLlYvo7+|UbFSCS1@~vWABcUh4*I>iTf&eP
zxE)THOM=wC+ftWX-L8I8{*3^Y1uvtq)w@mThR25zJpfEGg09e4?8L4M8Bq~xk2Y>i
zTMC;mh{K6A3}(Gg-+SYJh;9pC*zkmHeqdD}VT_iIQMFWg{aBGf*^)01ulLAzQ01gG
zI+coBhRg{UX?YylV$G4_p?MbO(D{#<?I0|ED^l{EEj8Y0%?rt;>f-9F%fxBlP+%Ah
zAU<0ID>Me(ggJR`zqVd^7|Gger5+^fJ880x8Fl{RL3o)SWlFq~08S`zv8&5iy&Cje
zm9#YhAFBrTl2fdA&KHxulUE~-D36;sk1yT-Q+B>~6b;E9UZg=+(_aLTkT=38IWx6=
zN@_f+b{+4*|Frj`Jqq#;86|K)i(RP$2^Bc(LHo@4Nuf_dP)B0DKG0DyWrWJWwa{|D
z^XYFvy$dg5g#rfx3Pb#tllFfP>c3V%RS!R86%-z(4C8qVb1Cy6YefV?8E8@(uX_F{
zb4#g3Lxvqzi_*(v3#3K@6?tXl4YdsibTt%Z&H6L&8=)70k5BC@FD4IMGRS$pBb(=3
zr|s_J&4)NY-&b6IaNj=&LlIcadkRk;lko4gsnxO-?%F|?d3^bM60lgZW$eX-!N>yK
zL&K%3$CytXI#Fh~+6Xh1?gGT{4c-jlX@Tw>;cAw*RJzc6F04KIyD(eEd9BJ_)qA+G
zh+UQWnAaBS5zj&F6QVsY{x*JPd&H0d3=4Il`$$E>Ad9-zC`>KeYML&yUoM|XZwlR;
z^9%czLLu21c-NG%F_`VN2Ew!fIi_m>XQ>+ztx!^m?hFMXL7(0X)+h8*MbRt&+y-;<
zjXcb`Y6Bj#hUUg{6tc;#;KR?=Q<<d8VFn*6E6mrYtV@a7;^TYa5@BQ*%^XH3WK1+(
z1=7I|Awi`kz&z~{;^-NN57qSO#Ppco2E{|xYhbX5u)Y=wbjjj0>*}j`NjfbChh&t5
zfci#L{syh%;Z#Xu@p|>`$`v54AnOAi2K6C<_@`%TXZee|;8Z+E?h^D6PJR_Nd<|55
zQoJk_WhVO{#nWk5Q3{iJLW5q3{&91=iA5`dN6GjYBP3SU@yA6U+}x??W6RX4G8W}z
zpIAw&yL%^aqo`ochB}Yu1IIV!O5X_sHR+&o38Ub};#3&q(?Fx*c+`PXMU7Uh)RY8-
zd(osFa2-^p<Gi{Yq7)wzbKrKN;Lb%fyMjnz3%zl#>Fk?VjU&m!$Sle^USC_t0-Gn(
zyp<JNgJ$d&zJc?DRRm90JJ1|AQCxisW(B0}WZiW0Yle|$EaIbNTLuZ~L=we90I3Rq
zRhT+YIYb-jabDp^jKh|rq`}aHgOsR>&tb9#+R;ZjYcl1LXtWQRR}=JR;>K@o!SF)j
zV6;I&bCW#QD)OfBZiZl3^U_$xZ~gYK3*N@Ijgcl%jj;TqZ%AH~ycxk4oZ~X+;2Y2$
z5>zJ%7W~NrfaK%+*-V(_X-KK;>YRY(!;r?3cTP?YRS(pzRW@-Bj5NG-?R>||9X9HA
zs6F0x$PEvzxOtT_-gQe7-;rZNu{rts2Y*9&<7m`{(?w*u_3Ql4o`wtsrTIn;VwqMg
zz4}&V2`p6G_IRnGeSJcuaUr9oy~3x{!a|iT{Kt`DNxB^U`?n^h;w^6)M7I=Lnp{j4
zqNq@VO)9L9=jiT@LTeWGVc)nnD(=M+$<|B5xTc1>Q!(VqliqNOPqw|VNf2qK2#fSM
z5+mM3{nxgHI4KKJ&r&<`YCQiq)8A(+fTHwoqyzOJSI8c9;$$g^!J}VfW{6$EnNh~1
zM#y3ivutpn=kh{TkAoP5H7dbvn9^rBh+1_i_rvv51{26x()%6XCQ&qn10Lh&r+xqg
zcaB4}DpD*_?D~9MO;#gF!frz@SN0!Gw=DTo+fWxk+pD`!I%xTB2BrJ$8P^M2B60h)
z=<Ot<q#MDuvMtb^r_lo0rR&rHC#Tty%;ujxSEv)beXur85ObaaZMI;K)TXoaK_O{~
zsSY>Ea&LRveCxFFN&S?>`rp1!1mU{leYzb6bf_-)@PwkQShj<BnXm2vl6`|?K_*0_
zm(PmqY+P>5ZclU5G~Qxg*qM=9#l3Z4kiK)g4IQwbvS?4Uqjf&JBIw*6C;I}->mz|{
zVjkhQPWWJTdE6v0d#>PDw3<xsZyd~uV3}ZR;$V*J+|IasTXGI?hCkY;l>R_RdKR<=
zZHo-$G?^*Es1!pe7qIhRxgieOt3=wu5B+FprEHlUt)P4FF5o<v4+YE`3$BkhGXR2D
zuOJri-ys{&k9gBqKLxYEhZ@hwtV9uSy0BudOGeEGQN{T|kPzq&t}2s-tCMRP#)sGs
zt@bjd5{s;p`&C|4uzvOuCO(9q!;_}mxClx3xsS!GsidBt#q6DhIm87uPxM(T`Yk5%
ze(Nhj!jXNB>od;&g7xfcpsgeO;YxS740i4aRo<dItO%+597r8e(N-DxULMszat3Uk
zws35zh23=1Frj`${gdkmxDr*r85khKaz9#GWrkZzREt+yK^gs3D=eqlN1y$aQlRKX
z53C!lq(VO|fyWwp+OMyeph7o%N`;a1=MZxkF)=WvK1OnyJ~<`RcPr>F5LyJ0B9cEt
zaE(;Zw77X%KcwV4`n(&}b10-35)f0zh(jF(jth9|S2v2AG%#_5n;J9WGg<_w(&Y`o
zzAw-0uU+55BHdxm)nf`la_uR{PB}6sjVYGkPXT`?Oc$KoJhIm>H#7F%WNK`%2Q_cK
zY&#6;k+Yw%=i+`VfZMeaFUM26(yeKrYl{C030}e@5=j$Vo-EYj33AcximMc6iwWb{
zBHs9V`P=To4R<02@?Yd=_{YBpEdE|cMQoiNJ>=~y{`dabkpCf@DA{Qx7SYw!kNZ0U
zG`eeeFerABX^L|BP?CN7WV9|qnaReLPkN{2zy3jtB04P)Zht=0Uw3;jHBR(&^YsFs
z7)%IYo})73VszLl*GbgL3Ih%(^Z#5$Lzwcl?*?~}8}eq$c<1Ue@F_|!NCBh8s8c;?
z$Cq@WV&)&p9E;^o;I9~C-A>{HvIGDNuOdW-9@1qo?KpSuh9WCAZoY1h#Hl9ZJKAK8
z%sXa+m92ilc%c`uDhBz2E3aJPb<Z>F5U{w1q5!<kW8cxA5opqnYHt_>K4F4dGBp)N
zmhGKDJcs9~A*!-h0}ARoP<0iYWj&$I{TmYlK<}W#kHEzvc3(kszV8cqGb%BEo2f6{
zBf*pY)uU}c{>2*X@4c#K;cWgtmO%ep?UDb7b0tc4adC(uDB?p&ygLw*jUjisc^5_m
zSqvV%z3vHEi-x9R<XrE35X0>b_>01DYsjzL9TPWGMkcyzAKz|2Aok&!Anpp0^cC7)
z|A(({jPHa=wvKJvp4iUBwr$%^Ci=&=CpIV6#I|kQ#yh+B?%v(!?tMS>_v-$g>Z&?*
zy4tM3&N#FuBjQAX4DK8-+!|%#)nm}G>I-qli~uIQk{aKlYq7>hNTXEl$GclTg<AF^
z#X1+B#lrf&GADT)F3z7#&Gb~AVn8$}sSbml#Wb&FE3l8auFQ+$-N_n1lV(za$&e6#
zEVZHbmB}*;jR}sXt5J(BwIo#5!W4#z&UpSIkeqkRa$|=Tp%_7tO(I{Cx8X{fhewR_
zy+-()b??WVC+JyP&2XnlPQv0b<YQSLbZT6?ixFt1W<khp`2ph@WK{W0M~rs5%k?*_
z%7Wza#s~O+KW2EBnBmAj$2;_A;ry>-mXt8Hv9bRv3rOz<f#3!qCk7F-+b<P6pOxh!
z27wGOn_JmmFY<RX?_)b5f4sxRS`5M!US7tqlBPBAPC7C{d4t(m2^+hGuV?TyU><ma
zzLa%jR=!GNoNic>Zt*Eti~0xSfB^M3w$|Y2_<;C;aA2@B4J_<5Yz-`|Zq{!K!SMm`
zWRu+pe<1^rHlvtL{+RCkXA%7GE!er3I++<7n+pDQ8*Xk8YHkqhe_<{CraBVl^Bem2
zRm400wjnWy@Ia|V+K>5&{`J7Dc_ZIoUeX{Si4Z?IBfYjRe<d|5C+KO`zmb+u1||j&
z2{LfBf3a`Uo%789k6n5HfBS^~PqH=TejY_!3<CSl5&yd>|6{84Uw6@xl=$DID;PqR
zTofD#5&A1vxaZ01r=(`t2)RuZTojxP98vdh?{N2cH;55aEr=uq1p8l1r<-F><oQ$c
ziG%$oC;X@B|5ir(DqGv5{Hgks31HXgRP+{C)wJ}b%XA4JBpZzi2EnXRWfF^XA@CeW
z>f<(wMyF;Gk!*MW+-}%c=O}eh=b^1E=2u+qSFOK(`+Yq_Z_|&`u-c68(NqsO;&bfZ
z6vN<yqi0glEGs!ki^Ad3pxKNKC^%#iw^^?iKSdE2V#Z+gzFi;&5^FNqI!@az1mEQ>
z(F!RgyJc&OuQMHZLIFDEi^t+>T)cQ>g?)CaV#sy&qA#G2NNXZfft3={dmpgW9xTfk
zr0RJqm}uqY^T7B?^JrlkTP#`j?-QjGbDjEj-TF$sVk|k{VGJ)>j}zxm^%AkG9ppm#
z;-IBE4s)Kzy;TW|o~#--Tu*nCsWYcynp^46*=ULKwqwMETT2YY`+Rw)uaRQWco(n1
z$idrjasp}PKlFnQZJSb+{N?+c$*?fQ={hRdM6==aG7j-wH<9$x4MiEoZ?6a^tfn4B
zBJIT5rAJq~8GqU0C1?|PQg}o{F(qs?w|tW&KSADrq$cKqV4@P?+NQf@bdI@kM>v5l
z3R{e)j4aMvphSGaBhc90Ltfg833YAz!r4a!Np#L<lcWqHDZpC6`{7$Ofrnuk4M9Xd
zT@L?5T}V9}O3Fi)tKb%9)bxz+3}^qFxM4xOC02rPCSIdgK4Qinld}Fa@(^@aLFLk^
zMWP<Ylwk*jX-{MDz%WlZ7#f(_Tp?x(iHCrQ|A!X!Ba?uhc>or)lI*Li6ENa8jiR(d
z_C<$*E3T`%#lOxeKU0^`8v6r2N$~$f-~8>hp@Ng;-yrzk^o@$P(%<@B_!Ke{05YL4
zB7OxV3M2^?G;k4M@R=Vf1HD4OxecL$3==X=h-e4UpDP6N;%G3p`PYM0`R&}l6G7iO
z+CTYbzh!*h9Id$nxyM731UAI*2tLOl#P91t?X@tBfR<&(MT92E9D5WRGlwC~3W70_
zk2J&#!Bx#Q8AoCRFj(pae@>*b(*h(0-71qu<tClBu65$c@Y9uYFqc}a$KL7`P_>(e
zrZcIpV(m*R`MQ+pwe)C=y^R(c@eNy*(JBm}snZ|o1B1Z<z%0Ke+&#-l&hwdbq~^gy
zFVU3PzB!qYLwlEdWUn>d2BPtM{~&gw7(^j<R7|kYir-}tIYS;E8)I_e0Gd!`=TObL
zxkJB9l`lW9jAY~E6+XPs?71UM@zm6u!E#Z+T&s8M*~#Bc*i!IKpRKbjT19BaAMN(p
zDpgh9u?u)zUlEZ-y{C0waxxX4$HHi><-h%?Oi18Qqf=e)Xfznp$~QM-p$Nhb+eH!N
zu#b$Pgvc>Uv0`Bi^#q-|Uhf9MfbZx=nJ7U{$UEwFjk92Y??{N7c^y$|7>s8Qp5lln
zHuzc^m=L*Rn{IAtHoAarT)HX^ifQW~7x@(e6E-_ym($E?%gm5UXC6ds$&NiUAiZG9
zzB>~B%GaYCka;gIM)VtlkqbH>-$fePy#J#Q(m?-YTbOc2jr?3Zs#KBXJLYabHhn>6
zWubF&i#|WH_;nTGPgBo*B%Hp{W8!zX+-ccRHGm}DN`Y#gI<m*x%hi+#x~=}Ow6X+#
ze}EtuV=#Xj)<CTge#x-!y~q7FjHZYKerJo+$QWc<N;t1BDVJ-qk6m12>?rSu`l;Pa
zS?;9avma@`@%Nz%eKJB7SA@LiYm}jA4RSQXn`Pv{GjS+IzCqws7E;w<`8O0b>?lQk
z>b4Tq;+qV@14bcUAy3LOn3YAQgVfi*2zX7Q%e_MW;6LZj!uwx-@PD=H{u!h%lYG7c
z=kxm$#@~6xAk^UH<z?l?AlQlE%^bjwQZdL&+Ds1%nVGMJR1PsuOvzKonVG}TRWeLY
zO4P^>G4=~F&iYKSvBAFI{>S*y|M&RO+YMB#89?&=?E5a<`Cr_)oM4}R2NVbh0qQ@s
zuK!{FzlD%&Ra<pjQ8Ygad)x#%h6EC&HKoQ3Aj|U#DJlshMx3AQ@$5zUu$7KnhKavc
z4EA=!PcV^gJl@Yw5Kl@z!D6Sz0fo@i;vdZJN1Lrvyie&ozx+Qw)(C;@48H|p6&FYt
zi}ig#y{9cQ;tC?oIWc1jqUDM)#YW*_u=-H&MJm!7Y9i!X4_iHx1PZ1z{$MzWo2w<+
z)=&4RC8`I(#GrATV6$=Gpjwg@*!Cwz%BZs$%RXrHFw!NeRajvHhj2=<tzLX;k|{B;
zNj~u;RbOErJp&kzc+O!v_}5y2s5>tVW^W|Dj4xNDOlVnES#blj9WnUCwG6^q4`lk-
ztV@^ztZ%9tVm!F(;*qaa%Ry47bSaS<&aE~MjM9;v;k--gQ|oN{EE_m7k2fnj<?WW1
zIh0(NXiA*aCdjvs&lZ*F5%o@I<h)CLqO6({q7vY8MDb5lY8ClZZ@iXPvF-KP&y{Gb
z+s#H?W`BVlk7<*yIh-LlAAY*>)d;sAt12;O%N2om156WbefE<3JI*xRsH?d99i(U-
zTEx?C=Vi>d$fU*#nr&JRW5q~oI-<mHliPmT(reaVYN+R~$Nk{m>`eox;#+%s$8T3X
zoU~o)*u#R?ER3nSRuCruhu1&T7$U`wRW#Ylrq&xmwXyIh-@~;T-O(mE)U3B@ch1zh
zeU}Rjs8dd`N^Gpsb<XVcILS&u596kYol(<^z}OltwOg{eOP9_-<I0h}9<+M9Ew1)u
zh$YQDc8ml0$S#@iwmCn)cG+vSQ#$EK6_YOhR)B9Y(ei!SiXY*Wizpy|6Q?#XJA$K*
z#wNvpx8fOKu^>KmvBE3ZB@h#7?Negp^9(lVfK1w8?{wHlMaWBx`(uyXOQ|7{ozoHQ
zCQ|E`eJIYUD|QUe)gHKdyBq|MvL&@61SnU|fcM?I<ronyBETRI3l2SkxJqHZHzawa
z%=p>oxithg51c}hw5)ch^PQsU-3#&x#TU)6BlJv%Zg>||t98q{9%|Yyr(2&_$lw4>
zaCO?s$!hp(7iqNyim+fRRNNdtQSyt4g#SJKnE2jgJ9&hO{&_}^dgT>;jp-C<oh(&5
z>5l!ZtFNZrBf>l4BY3EIz%TK9&jj!AL4xtrc#)<en?$d^tnjE+P+qF+me`j=8@+2k
z2IX$sU*%a}N)CeO^a}C5@(YGFX0DP_c9W&@i~lTl0Oar*^((;^9zhESuYuJ;8z*VX
zLg8}uXKYJzrtZU!uzG1DN-U}zgSZDo{2O~l&#)c0(8r2N&3$<f=spcK9kCAHoLSvf
zV7D<opF*5><K(B2zf8J=0$mlJ`xEFR!2eS>U)jab*!sUie3JIxBrQW<b9U|1c4M71
zdU;S6PGwGtXq6;kmP*&a!uj^nJk+pj7iR6`*P=4-U|m-ED6CBUZ<6nDgQe!h(sOn<
zribI{&VEPZ{$@`@xeQxC<9nWP(BtK4k;K6(dKSY2j*#vHj5S7r<B^WEJ2_aFLsSNu
z1EA=L*sODpE!MbdGnL+O9|NF!!^3R6bED8xFv3?cnsx9sJ5GS`?nf(UTWmM-=DpTp
zUCj|?DVK!?S%xX3NfUSMqzA3^gtv&|iDujdzRNDwN7`FB3~TbfRW=>vO5c**i%eHQ
ze7<wNN8H|!*g~C0_O;ka+;YTR>D8QDaeG5)kyE64>jkujIuP6bJRV?2<}rVw3+4$1
z-w7@UvI&-X<={OQuH3?Pq8<ytEEsjnW+JD+V9NtuZtKC8>g4_I9EVaSmn@g%T!f;O
zNY@u{jh@7gPU~W5o$p~&vgjb~Gly1~Mm7<y03U>v-?Yl~NX!761|WM?skHJ4?f?vV
z2r*C$FVzA{=A!_vf1DN|3$M;7MWW-7hyTLxh~Obvx>*G!Tz{wIk}TFLvugq~KqQ#M
zaC&QX5f+)MH0%}{*J~D;ecCe9q^<B2{mL%zxG(J|8#|Q}Ir+4Q$|Sa>8ptovrjds_
z@zDTd2!vcOsfrUik|>@=`%_;4eC{{N{VCdxyaNC5F*448J6x2oAP#kXgBdUVPW^sj
z7Qc0n28r67fDp8NE-{K&97{N7F&X7T)B{&<TZFZUQA-Rizhk<QNMj*c4lz!zd+C+-
zc<K7W+N)I4Yg@QB8aWl0BsfVRicF5Rbj<UJY}w#2hg_(g(B^dIbXKwR1EbAER$l53
zo!D>~)68NM{~Pv=yT5+~%eQ)w1T<uU0YzFQE6J2uOZz0L@Hs%<EN{S?QlyJ?B<l&;
zrA_%l#x-1AGBHogw0$W}{couGbJdb6OUsFO(0})Ya9WskhCiOL^dDtjfA@sH5=p*F
zx_>XXK&3Gkt5kl@TUnPay8&M`HToI(FfFhWH$fLm7BGNjPY;-;(h4X07l{$a_Y-c%
zGH<SsTYY<VGQVo|p5k*p^!vJb!2N-*0$->&?>P1yZU8MoL)u7}9G9CGpFHSiJ}|y1
zIt}&uU^W>${POZ_-9+56<fhLUqy8C!+gg*y6pya@{drqp1ssb!-c-Ci_O9AVn4tGz
zE)>9J;--JhuY@>Iy<-O@q3lKWlHjIW@da#tziiG}BYm**XvDzbm?x)@dL$+8L1H*i
z^)(c&st-Sw*uJy6@ddkC_p-4Ri&L)x$7)Y7ve>jF3Hu%+br+wIai8QBQ&A7ICmre-
zvf%P5sMzC@YNr(zg#*1H9In1!Kr}m?OUID$Y2;gIdx$SJH$~@^F7x4)CEVB-n+W_W
zn<Jto{*x?kw{Y7-a2}E(S}A$luE-E6YV1N*>sBhiMT4kv0`Z35RZ`Ak#^n+h?<urn
zBT4}8kOtxXu~-9`pMJS6^tTv$LKJL19U_lV+Htvnk(cUG9mI-;NJ}kku`@}x8TG8L
z5#<Q3QG{MR4cgf5QRDs?@i6}YwNE&@L@c92Qgq|AlC%IZ$LL*dMZ0Yp+I?syQV?3C
zd<eST)!_AF<pq?8i#~?@bfn7nnXEL{5(j+g#6u2NDzT$ktk)YXNt6_vo>Emn%RRK2
znOoQx=D-S>P2^>hEH4kSKmTqwv)y$1Ag!7LlxY>9lfxh6-|_bCfoj|K57xpV{u4E!
zsv`F94RVzUJ6uyVK9-yt&N)ezvOU)Q(BMynHA^E2;UC3<<rNr>Qn+O8@#U3+m-Fe{
zt1})b@$j|VY|FKR60{Lu;CZv~JUKrrJ%IXQU1ppvIr`@7#(JH9=l;Rp)8^evEaq>1
z@Y+ClTrL&b4rAe=cGB8fqdgS0gS(s%HrOj`#!klj7}lsOn@i77y`l?Ot>jb1{Tl0G
zJ!o#h`*?s%T|-q*%VOu1HP#v&_h)ucD7-+WcQm`aoUL1?p7$Co=n7@1SOe}otBn06
zVb!JT`&38ho9%D)R_))8e0nTBBYJ4ex&CiYSNR*4zRAZFtqt#`Zb*QQ6wH_25Y88)
z6E?V5Pj(~upd3|r)Q;D}HQ4-Z1w$L}3(!2(N-3DC9>Pu?uMHBb+#zU!jc5Y0_7Nlf
zMJ+Ul>_sn2aU_t>{>MUKVg7TywX24FU3vuY2)nla=tYv6mlIoi{fCiWPXeX_8kUiC
zx=R&R1Qu$^)Ig=0iktp(ttTRzR}PSq$K{GHdRR<I%I(^oA)do+P@iK1u%?hPgh`Ss
z1AKK>nUAfw<vevH?P$K=*p{^_g4&2CGz+uWD(gLVwS|Y(n0nELUmVg)*BAq71rnMW
zwL7DS8A&hr2lPKi&};QRv9(qvJ|RBzDDGgH8@&N}Q75aTW*ZDijekn)%Ty!(z#Gs$
z*DRH3?%&v0VEB8Z;dwhoI*ObjJ_(N`RqH2*PnIrf+54{~53EigA!N74`C}V8G7G*K
z&aPiF!=L1jGeD(tkld+(a=R`dZSg-2f5|%qYGQV`{ak>|TjwwJfnf{bKq(!&PoXz4
zAH2D}>p7)Rxi>kF7a6m{VJRp5MLn5;gI!<_!BqUpc#kG5&vbZg7I@Q`0p?(L!!#bf
zLiw0Io}tKS_FK@e%zzdnYG~Nb*VJOG>)re$J|oK{-HaeET(S^(+}R3tePR#Jlr~-6
zbC2<hcM4umo-3<heUh<xr2SV1`pCrfJ#YNh;Rn?{rVlL!y~WhR_ed&uMPIKd3t|c{
z$qa1e5AU#3@XLsK&vX$C$wnEJF>2LJcyIg#X5SY9iBTri$O8n0M%3~W^o6f}r*Ft(
zffe%!c^Yst5xS`yN=Kk&V`o@OlTx`(7)l2^GReA<o57l&$b$ygTKcu=5De5c2mXK2
zY!X6&cw<2V0R{i-utZyX6H}XiP6Gam{I^I=*9FfM+pp`~?r^O3!_jiqPJ2x08`%j(
zq-|LohD9b}M!jST8C_9TRpJC$x(joAcD8LMJ2-kAAviL658-za#U3rRIVcUqYY&7}
zucM+=J<mO^PLN!CGj`2H*@vvdET5~VuC2GP=DV4pD;?lkc>l#Z{HDpZBnWYFuqF|~
zp8S4le0nKv-XwClZysX1l_YY~S&5tsOr(y2$ON-DjK!`#(E5uv=)5F1rTxaI2Nm1|
z>hJPY`;y@$Nw+nUKDv@gWI1Zr*x~jPyx(qQi*~uf?N6TByGU<3`rQ{Ez{4@gyC`pb
z!wDtc6?VzPQ`tK5cf>&+T6->YHv2%r&kG$_hnE2?IoP*4W7u2dwfVoclX&}X5U|}=
zl01?@IzMzbKKf7u4xNd7NQj{O+aVAx$J}y<A0OQjp;_Tvj~)>&@d>b!JY)@Ahxz1-
z>Nir1J>6kNGPDt(MR;Hfio44z57Isk%8=*q*~M&>xzvYoABp=K(FjlqHiK1BmXqk@
zuF}S36z3<;IWCMQu{f^S@aNb%0>_B#usAwlbhoPytv=W9V?eW;6lpHBa6aQxw5Tnq
zH1}e5l66nZbD2BCdrP}l2e{wfA?wBAmo`XhFOP5Wnoi<`?_uUOJNgG{=ZYtb2T=GT
z<N_L{T~zhdG-Ax#VhrDi7Zi|#wWLK4$Wxl5osYm8@^`3pX|}@y&mM4<;WY4wL#<;n
z&A}4{-9~dv&3%+8FvAog*_E2k73Z>#%13v)wk2D6_OV|QeZ^)NN&&eR&t-rmE?Jy<
z90)c%Gzo?>^|aR2gLv9UgKY!tJ0(DhHD?viVY&{vm2atn+nrf#5qFiPq;}&sHplfK
zKF28IKx|zK8Lz@%WpGOLp{Z+Uc3Hh5?g59rXYkGG>*B!Q`}c*kl&U6T7GdL<H{T{j
zL#u}zq3!Fql#bGUd?}J=B`J|C;FJ`lqBD%~(r4y_PcrZw-*JA=T(c`<57{$S>^R{$
zKWBFnNHyvX5yIEpc-w{#xL;QTo;agA4Izu)sV2yyeh;BH&4h4%{@_T<B>FLLCU<hE
zVn`cjbbVE^6y}$Madr>OyQ;nSTp-oJIY}SBCm$i$aCe~LjYqgGY*h9BTm0r0Y2q|I
z)It%H0Jo-*q9q#xhLe%m@{liLvb2(As=bENa$K`Y^Nk@=a7?Am!qUCoI?8CqMXY9J
zfh|;K?>ksudvWVwf8|}L^8{0-pf)%q3$HFK;w*T8w)Jv3pBs18e&8Z{%fXjZypp37
zJYD77;Nd}XXf}OqSF7oZ?yAOJfMgnT$9r(b;Kf3hs~py4oM*WWx96`xt7F(;w}nz^
zeNp|=*8Q(wM;`X+B?FO`H>(yc&4ozsd`gX1n)yisC#x~_$A){D<YGn5{i_CZhsh-I
zmtK__7(XIi>j<zV+EuZf;A+#LFG9((YDl|7$|*NLkOk6k{1$u=p74d8yZ)=v-Yc?A
z#gH0`t_m1^Q6G17Kg^r<ifalH&!dQg*3=}pJff&ZJ%GoU`N~myUYMoJZ4@Nju@HwL
z3SSEcQcTp%oFrtwwf2$1(ic4mqMld-X+{)<(XUef9}2JrXS60R2=K(J`adUk0C8A|
z;kf2?BxF@^JLO2#2s<U^C-Vu%VdTc@Zj0h>R?%9<9~H~lQEXOg+$Ks&Yti`dwvrio
zERlnlb%uiS^?6!T1sNV6iHKorboEVc@r|xa4JjRR)@PWSYqiT-ZAP|&zbZ#p0v~@P
zuVE>e-<(KS>h{-Mh}s;qhN5qTCRbXaerpS7lQkC5u1QVi>%QZMBaJjZ?xT7&H2#sh
z#x=g}*YQKJBapa5GN#!P>1qdwpX`GauSeez&A((`;2QiLbx4#?E-d*uUjtFtW-!b`
zRL_R=10gDs@BLr}QF@<|8ey;S+8`2JCY*9R%sgBmHs1rf<ek8WkYn}`!^?vQ=U6a<
zfZ)e|$%mg;hAC}qVohw$;vV%Ndv~VTSRUAi#N68(^$ml*Q5?!wPHGD`!;T>-DJ}qQ
zh|BGXsV!~TnbDV@39tsjzi=1jm6lVs2KxD<Y|W5&7&;<6H9fLm{7Fj8F{(9ems8VS
z$WQ305P*u4nBOzan9oFx)^$f?L|7o(SBiA!t{hf@r^x1Pywj~$xCmv)j@R7fVw6bZ
zl+o#w=r37F^E?6E#RXIWdJ7>Z^8Gmqv3C>duuFA<WpsSbvO0l9BlK4b^qj>Y*Q;X=
z@;IY9<}w}Pi>oAf*Ym;;e`V}#3RC^Ema}6XfdiqfTw{V$C~xhrKtO*Am(QaTmgp|V
zt5B@HEGwm!*-XZA#&Z65?wR86-XsOxw&7FAaf$rq%``VJ#c~R4%Lt0Ry+{NFFO<HJ
znHIH?15JS_Gm5IT&j(@OkICd-XL8^F(q)FyJA~DT0s@Lb`Iq+Rf5lY=PgbdaRllj*
zx~r(8``I~}PntLdbinTeniWcA0Fugj)}UfSRIF*!%L-IIi*n4V4s#<i+$`bn5sgv{
zR#kKhN~N~wRX_8U)`AsiyTnBs^mU{Ko+Sld5Os7Lp3l9_0VaoXYmEtaZBP7f=O5=s
z>#eVwnd(3`-D`rZ+rMCPVe>(T0-t#a5)WYod1DyV_M|!K=O_c8>nlU9(bN6d!YEBw
z!)iwE&tT*wzu+x%5Oy;?D8kGgg?6|??8D>jzg4Px=ouFDnsMEjAe~(cp^KLka?wph
z&iaUFq@w8|>|)%M0M~@|T#ep)17|^RzX890H>;s}R}h}|VznmUd!rblg?Hs6tqa%C
zCP~4wJDV!L*uPeThrO7o3udz-|3#8Cqu@U*EfcM^LR!Z8gp2qPC2NOci7Pu$C8E8x
zU(DAzi4pQ1YWt0tSz5}(Z6szHTdJQKdrY}lx`_%E53d>3(Y!d1oOcLcJ-;X`e{+Bw
zKGN&Fu+iPeWm@Vg5}qO}vjkui{!!OJcekD&TaCw8p4YSw1Rr_yjggtF2w=2h8;>xm
z{;<|JqT$9X8*dRlM54TSB5?u^ISQj5tL0RsV{@;;-3e&s*>?oxGbC><akVb3<+^T%
zo}XqF=d{I_(-2;15`su2@cONro2U<89IC+IBddpTo9RW7JULqi49`}*?6D}<@UWa-
zxzESKuhx>eB}%V;VX`Pi7y}P&(JQL8)&WXRZTUhpv)(B32s+V*i8P}iS*r_1dptD=
zAh?W<=K8M4S>YgOSkg-nq8CC)-C$|)5G?lM^47c}$+MQXBhu19B`_W#PM7*^i<w-z
zSW5emC3A(;P0XrDhl|3vpc0atzF%2b=AbtDVsz;@wjK2t5G<9)65gQ|RI^9C68eDK
z0oBl$4uM-^(iwezYuvE+G8&=xozBv}VS@rZ1o=^tFBR|hNue1_EbZXVIkHz4S1-)+
z+2BAP*Ehc3IXa#Amy2r5e7dLd(Iugkx=l#ay+T=bsN|s9(~`4Y;aig~-0aju>_~mF
z&?2A$J)WQf15;>zcH1C&q^$7oAT_uipvo|F5cEca-Q*o*4lCUZHz@(%p3x)ki}j!2
zVrmoM-*HGG{DSUaezRTkyzA{CLOdF!NZv|O?CoL;fcp36O1Wl5h3^H;zOQ^otAel-
z&J0+q+uV-qnCYglIlNP(FoCF!2vxtq1YNpT2Xw5MCdjZTFY0()m}nu}vR`vVd2ms)
zj+GRa>R^Ap$S^U8Rg>b-^eGXz_7BNL082o$zsAa}MqnyzF%r6YcEhH4DLZC@WlnNJ
z46n%&sAMJ1Al0(??gc?-nihq&YY&$u*Lmm|EGuU<RMqn=WrB32PCXR|U*nEl+>kp>
zXXe_%rE)0u+JWCSI{S<+-7wpy-WGKfj;noE5OZi=9mHA}^k;Wi5@-8)1_0J&mZnIv
zeTuDu`;10ZW)C3)wTJVU`HtePTo8t{tx6JXg}9!OnZ|KMMZ=Y>`-;9_7Z*;;4!ahi
za23P^4cM1<%b6-8fM2U-x(gA&5?Jq!hkDB5-L>L+oOvk!6kumR>^P_*pee%fyftpA
zr*?;~crI&?$=&Clho*E;p={W`H-RXj<l^Le3GhXiaoWRwxBH?p=UKKs|3LPISOt%v
zaV#0nUWm3Zw{AgMj<7u!V7D&{`WSlA54ecgkXy1d#SG*P&(%y-bi5{^@pk^$)Y+;Q
zFV1Uzli5n9`y8dCDHQr-fy(TPTb(9g1f9E{==O(UQ)H=4%Fzuh?+)0L;_==ioQFfR
zuFfvdJg}g*VgqG?npAwJM6cMolIY``ofmZ$oQa2B^VMqKx}V6Oyk9V$sesDpvYc;t
zYt>w<mSKz3Qr7jvUMS8G+|WGM@-`^bF@$)idgs92eI2vllnB9@xX%n&9BoEw3m!oC
z;ZI59Hji#i$37?|=CEuFD@wrGkeq<ZSl$c(0@dw9PqbVfpSLl$^5VKQM^TbFS9G~K
zQm4%6wae<;@W9__XvF=M)HYahV!IyXpNIIBH|Sr7$l>t4qW>eKSFj2PR12QKVfM@Y
z({F8@(3ZzT4Cotl0sT4I_9Bv*yCJ4wZOZi8MEq{6w1*PM0RjrI?y_(zB(#J&y;y0R
z(~-p$>T%vW^pGCIfgHwpHvKZ`52xD189gtVYwPU!6Vhp`1|H~Jexb#AJIe7ltUPN@
zfV@)Q&>Q|hCoQyq{S(qvOX8#5G%0oL5752#*4@p@pl_F&AQn%-z9FcW-?D0dI|xC_
zJ6sU_Onxg{2nFZBiulnlZ%FjzE@51?{wnbNr#K3cw_MG5e+HIKd~mOeqJcrw6DJ*L
z{^s+_eY$)Ib4I+Gjwql_y?l2c(q$xTqI*x0BC1tV%WHJ1tbxqrtE_>|^r37KlV$}|
zq33)L&hm{c1<RP0K#*;=R1}atYQ!~Ihl47YbLDzr1mzgcA&yZ{!d1|Rr^-o6oGxWf
z<~W}nj}$$H4ZZN39Um8+|Hv4#&ueeYu;>%dQF|)X!<K4Eal{zSa1;oUX~3YWr&sAS
zuKZ$1#+#HGnN0YkLLfRT>?vAzi<&b^E!L*uX<B!im@^v7$QM(OVdOzxiJIyN%jN89
zpVVImu<<x!<#+lH!W8*xv7wbNYMP;~#}qkfq01@IKC9R&ig+^i@Qh81+KNu>8&sgY
zM6%(S4fkYk9>jb($x6bp73fB5^2WMz16Q`KNozb)J?I?VdnfZGdHMVME!RLG|Adi8
zJh*>^yJt9u=RoTY#b$magDQZ$O4*4opm_{<Mg-Uf6)azQJ?agNzmHQ=azXfaOlfby
zl~G-BXV9U0mV+#J0PkN17A#u%p)G*`0lkC&$F$^MByZU&w(@fdXnyqGCu%gcH{3U-
zVWVmg=vgpuir8=g(P=-hp4FO|rKH+jvg7@W!r@561-c1Md7E`1q7twK&NrQ>_>Met
zoF^~*zFuBnyJ@9BibZ6Ecg9$dS<(&`osr=*k(oNe5*`v5YQV<Bk(j2g(Cp|$;!x|t
zmfma0w&8`RxvDJTZg%TlHC5pjDrweYsxlYwgd1$qW6Co#CWr`FT?&+@*$R3Y6WdVi
ztV026ZfH;Rsq~9-zlKJ`RcGK1`Yhf0oKY2Xbl^Hgd)KV8@GKr<ybN0A)il}MRJD(e
zQdnG$naJ(Pji4A#i!W)JY>Ms5zXasHQr-6@YFOYlw8F}A;)DVy)hn5UR2H({O-mq@
zEPtEbmfCh&mCN-N-}lUS*=scTnXF8vNcTL34_@#L$*x1`KUPyo6J6uG$L%&P)Q)Kh
z9_gN2p5+|27EaVq0Nayy^tFItkAd8xs688^kD*-d@aw~%>>j5w{_q3rXuDi@vq4|g
zO(IFqlY!eftjj`+Mkx>Auaid$1X?>RQJkwDxaNOp$=?CAYLpi`@#z8l{4fg*GTAvv
zsW_xd5o+uz)eY$EH}Z%80l@c&8pB4lIT<C^gG3MKs8nZcM{B#}BJJYd(EFJgcz^g$
zd6Hd?oM@)kIOBVcY0<f#o^j|$q<MV7U=U0qPkk<0qG_}S_aHX%cKMMrC>A6V+F?!3
z!N1!WNgKTK=D>g?^l&a`5l!Zvwd2^p!h)d=M#j0oJWt_}xsZ8<@3nEMV~DDpgE?dE
zXyfLyA~PPCZ-5Yim!uLb4z-L;Tw5_1sHW+uC7xm#H;vRSj&6uwc=EGnQ>!Hq8zry^
z_9VZ(!1g#9ynyU(a<xfgTLsPcXUnI1#I_?_ncPEqZ3v_KrostV-}92p&tbvrYm=_K
zrZS!6HH{_QprQ6-6Yo$LLI=c8)dWkJLz8czP;8Wr>;FaJS2}Er_QxHB!2ad)^8axM
zSyNl1f2x3#WbNm^Bl(`SxY_EU%tDNsYj^E(79pq(085FeLYJvhNtDs_*zRfdC>p5`
z3f>Di0)<0_gYJ&U3XR*XBjG)5uVlwQxovv<@rN7KZVDWVL&vEffVW;=#t~jt#gM83
z3OsRf-68Iv%{{^w$9<+G?cLKy&t=y&Nxa|<b2zJENdBNE{9wUZlhV~+s=wk_k%`?4
zjD;|?1&;~gAn^tG_p`egHp6*01$G^Y-dRg2u7VKZmb55KwE5h>Pj|f?x>$cDGsMk@
z?T7}U61;^X_rbpf)~^zP7J&RZqkKDROF7rdVa~QfDCCT^yZDKFYl4-iUAJ`rLNhn3
z@9x!=It-aCAN>`YdRTfyy_mfcST<aZMk!Y38_+;;lYU`=<^4JKU8eGxp_Mp{mUcTH
z95$6UtygELJ_a6E+M(=4M=idRF{!?4X8P94m}+@<DW!gH1n$TO_Hmv~tI>0s?B`F~
z)%*+ENP?4js;(bnCXxG{MG+v`qDMSw$^t>VmAQG>$wDwvD8&h`BkW<9Ou!tDAtV*X
zSO=KEo~ZXSoBcsB8<M84<FL2r+;rw(jdEL1&bgHK7AQ|5w|e+XvL191v`RTCXk*lw
z-+T=wUjV(Sa1+Xs#?i1!I24~!2pS`SnZFE@F$g&VW)V(WlM{I5AF$*;FdLBXE7MMs
zu#8CtyKUF=Is(x(e@C!+Zhrjb-<evGOA*LFKpKGhKfvT-VgHZ0FRHrAxTa`+di5^`
znjpbM!9ws_<dSp6O&X0Vh48Z}C=s&Sqw21Sv?GhA9Q2Va>u;nTx4pRbu3|z%llPF%
zr0>L?7JNst!Ev;5l!?xhIZr;h=35_c6Z$nkI-+z@T<H3SIU{uF&GF`zf&)t$CMPj4
z0qrzmjy0);<P=Zov@F!Yb@?O_kTz^nvowkJoYQ?!5y-r|M?;3Z$YG1F>RW8QeZDRV
z9FJfCj?~;Ufm1H&=8D$*qYO9KrVX1Atn{+#or`vM9_ape9j~#Ni<4s877*Ve-??%$
zxSB&1u<D}Kb6w*h=25+BPtHueYQk~esS9rOzR@azaoWav5UCs&5YRC+CsY4s5%e>1
zX*2iHwg(Y+SwA~!;r1$%J@X@yip*kLFJ};e!Zz*W#pkF&@dx@0)@Z#OD+*bs`|}vv
zjK=S2o!{g%zgOVseVUZtsI1yw3!5~q)>X%=th(2f*l0r?Q*QJ%T{tb~Yc{$nVdGi#
zaa1v$_{TnWsXOb~i>A=Cm-@7NS98o9xV3xsXV${CpDs{&&&-W&VcxgtkO`*Os<}LF
z)Yn<u8bQj5xuo|g3U=MW8q4-6o3QQI5)1+WvNo12fftN9md`2jU42b}*lQ+K-99W7
zoWb<0zF>f)%(UAiRl2Se&S)tk6X!zAcX@V6YiUY*GB%a?hl=QcZ;WmjRF-<ImxuSe
zC8;UR_nmw=3u#d+PAvF5L)KpB=_%g}52UrT<XqogjSZgBqCUr%r}I#-Ozuybj5F`s
z*uI%O(_)!QA1ToNI&t?(eg=msOat6|ZPb?CNb|>RNbm%Vi_m=e5Ba%1lXC2G$zm(J
z7{|=wR}u|c5K#~Qh|6Lmekt5HoXr_}Gs!J10U;p+ChQ8<m|~qK4Fl8rZb)E5Btk25
z4i&#OQW=T@Onl;mO^CFRae3eVU5AlMJI`P2M*{E>3+6BJ;|U$^;0#Hs(Bm<(z|d*t
z@M+@LFU}ZlIY+^*Fr>qoP;IPyZGSXR8gD_fwzVjLWg!rCQ5i9D5u7BS((WOs((WTV
z^$k#<(k`;Y17^IfYC7p8Q((Hl%+|_ob`wTWReyt>Rp(l3&D={@jq~J^=7o#j_L<K>
zvrW}p>+#t;wC~TNUkaA=yogui`Dfx-R1V-ZhxH9-JrID!`k*ohBkO>((J<yR6+cnt
zF$~ht1!o3ES~?>~GXd(|=H@S%&%hQ|qBZtN<dK#xV_ncI$0}gWGuL}mUnwA+lGs;>
z3$H}>22t&eK}-)Zs^wmi4HxO@lfpn0I8GNXM|&2PFEqmo9LWyjD=c&Z{-WV~tL|R&
z=d44N0fd3fyh@H^-iU5onGmLL{nk2Ri3hS=oeW>#|8svd{kcD;AH(b7{{$mnuz%^9
zI{kBuCWDNUs<5H4#Xowc${TX1f@q(`8*X$>_mI>IBFKu;s;7u#Qpm(b5=B-<(hOk$
znKfjL6uOm-gn9Ry$$ngW85I=S4z`<3@oR7&puKcUOKBx~x5=r$eNmOgcXzhcObB#o
z=m+FCZV{Ur0DLR~KHNd=+w8Kw$TWteJyBI<gCVuHl6>xE*PgaD%2dlMOr_n7m#S@-
zo(aEG<iZ9BBee<2G&STA+gKg1Q94)Ny1H=}`099l%O+MhSkP5u-GqVo$~VNmL<!>H
zQZ}2ef57qV1Mfqb!i7XPO!XKbldn{$QIe7iAF<<Mb6;}6LJ8?RT-RI0u$NJE4QJN%
zXiZPX!v;KC8r)rj@w!r2)iuOZC$mTgFD7_us?B3SK9I(<Sob<Qr&x2it{YxcCljYN
z(;Aw~UUCMWV#{JYJ|oL>==8}5?1j$J(6amzy?{v$G;Fbl>Jt(0mlw>-AFFtCnY$!W
zV0ELtWVe~6e7kk)PVhH&G|odeak8fT$gdagG#~Z#k>+bXwm*+t<#u<Yww7lC9q{U_
zj$jj3L*d5Wssw{T78N3J4+<kfDr9SyPvo-Rha)wgK1nmiI-SFWXGzyzQ_|im)y+pw
z_~uE2p>7fDx+={u=zsvMuwdfpKmZMfe7YQTub4-Z;Kym?f)M`@{2o=96EPpt!(a9T
zHcoy(_z15kcZkulM-6Kii<rx6ltoO5Lbf5X3s5}0qe6yqXEAhLJox+j83kdmk2GRw
zPG}(mRiFp#EJd7_$irL_Omi7))E={_G)-b*zo@eQj<he}k)Z7nIOC}RCdtGP+;3hO
z<swdmNs`ni-sNhL+g;hkJ=ukjcgGZ9$u?%K;w#-AgDXYhC>b5KLo%Co4fzTB?_k!_
zYdV(r1LQfVe|gybFJS(!XeeUpZ0uy|@Xy}j-{YbxmY-Y~JM@@1b>5&-VdH`ik`i?Y
z`*&5LJdG~eB&iwh%-U|FHt5XgsHa!Tj{8Ae`c>=z?Cg`{Xw1#$NVHsM6l&R&nUQht
zP3K46DbL)aj`o+mt6Y6xtie=87h{S*x$)8K?*J}r{%~gAq?Hap2vu)dxDm26vv$hP
z93#PqqiCNMT5KIfA1Zt<^u1P-HFuMumx&tQFRRt+9W7JspWc2;&#S{JaNa!ayWA<A
zeDp}Q(MQGK3pEb;$Jp@$V9g&7q{SLXZD%5O97w|}$X2hHcXIW#(2+a?9(`7-+W;5U
z*hLv9C9}i<)4`_NmwW*{;fu^#$6AOI)pd(Lo{5calfNbhR__8&Dse&*sFRZ;#VjWr
zXu0fZA=dFD2lO&)d|2u;UB{~uG?T<kFlF3JI=F1rB4bn?#}Fca>y&C1O|&{kWlA?B
z&6W2Qm@xb%9UT@m^-?&foOj4N!;>nW5FCeorllZ$NJC*&&F;wXfIU$8Ku%v%*C}{j
z2vk&jUBSdzWaC{xGKY<ySg*h)uOFB!*Q7b0O}^Pz)-!0WeKSjBC4f0zE6()aAi_kI
zjD-kup7TU_B;%c!hAPvZnY4M4iA3IBFxkN`Xj8rd@h6ci=(TH|9K_J2UdqbnG9DR=
zWz)ZnBb%Fvg~zwwtB=9*a=G74h-pSzas<6PX$d0;kUr9o;c;E>D~y@C34xp1UsL{;
z?6S{9&3LWvJ2*DY6v2c~Xe{-!V#UJEEpeBF{i7nW_k}(<N@}2i1Du)QO1WaCX04DR
zUf_$V`ZjTHf`c6@*Ci-jp1Vjaqe$#^piX-g>b9)$TNvZj5Kk1S&Pxm&a14ji5K>un
zp768MiSV;>P$*O(b0}1?QqajblThfi9!*3=)W<ZIICN(S)u6^WK7n5<<e>vTYRh0r
z88_4B0gMU-EE-KB`Kt;=<UPw#=b|)<cb4dj%@=0yQY`Wa_-?>Ga=y5QHwvN)Jig!&
z=W-gwK5HWEU{H;i@TVeO@qm!Pwiz$h9Kd<!@==G3;1hqwBMojG^k!Hy$DZwGZzMO1
z{H#*^*rTW5L&^jJKZ<asj@m1$BytYTEw|T<GxOjI_>d_Sowpz27s(Qx-^WvJCERj;
zl;-Ovzo~c_*7v~rzTxSgWTucWQ_NybtU@i($Z*VwO_#Y^O*K&{G>L~VDk+L7q6WrL
zni)Qc6{y#_ab%S{pzwV-c9vK(4w;HbpP1G2>hcn(Hmkx7aTlA%VIcDHNB2dR+OHfg
zez8Blh#pWI?rV<s9fGTalw-MxzBmX9;H@o&+GaPjhjPo0jh)xY+bdas^l=5U@7+2a
z^)Q5};cmmaBj*rc%LdFN6l&}(kYFOR?}4l*wgV}jb10X$;5Ki;{D<8vz#%hx+H(y2
zWO254AULy>{PZ|2dQ+#i`%0eQzY;IJ2>7eqHopF%b7yfL?+t+k0+Ph}7xCNwx{D<3
z-0ZFYQSD``L;I)<;D2uFF|_k0IDkpW7=TiZPymbeqs&@pd@qpvDG3({Wye0XA2MOL
z9@J-5wYa#dRa1`MXl2P(B}<nd42E`1rM6~eUA17;IIniLpk}QtO7~&5Y4&S`A1aM-
zd_41I>g)M)>3r^MtrfRhXMi1IYf#qMfyEBF&s}Bx6abpZ*8<-h`HQFjw4Go*yuII{
z8LNvDKxg#ah2{5imjKbpp(cU=ad0P;;2r8Id?9uN&uie6GH~gGrhH&Xo{v)%X}r5L
z@J$VopqpLr^(PhqN}n6keYjz0gdd_R4pORHgkkFTZ)G^d3XlHijpf9U&cKlZ8iKo@
zXtCZ30hB85w6Jd~yeB<6yu@~&>(M3G4M^{>=)YAM>zeY4imTmjv?j+U^=4ZZe^-`i
z1}Ie+R0^QP41n%<NE0PZUxYrFbR$Rg@51o*NO*J-M4!#1fn0gIZe$+1PKKXi0`3jg
z!-9`k?N1j!g2-UeTiWhD#0zBw1$E60d=2ENkn9KxbCnsUB-kjZ$iz^4JjluoGQi9H
z!+zXfQWWsvg;D0W1)hi(SKq3_vI)gMy76Twks@;lIrr!EXuIA<sc?}pB9}+8PS3Zp
zcktKCZH^%PDYD{|MqntC3d2iTjBY}TDxh)&X2ndMPoru#<BqKs3MK*;k=zHDL<0j9
zq=~G;uJ((0IQtotL*lX4qSEipGv>7}A!Q1k#Ayn;so9Dv$`%WEW=A;7dL&6NRipDm
zfIMxKd69I~36E2FRo<m~I^qqYCdGdy&t)(Q*?u-C@UVd{+|9mUB7z$+MP@6B{W4;-
zY6qPGB2}!%$(0oE2c3BoNhOF$B>GnE?)qlh!ecZzAVSKp7XkGReX3NWz+j>d*K%aa
zNjdGOBxeFs$<I2IB(ORlqj}CrY^n1M3)Wi60IRf6njBMCWrs^DBSib&(%!Qw4a$nX
zI$YATwOeCDPZzDW@~B$m8ntc04PbGEAFHz%gK8ap1`Tw4G#asi`VYu-{4fwJ7hdbR
zdSQ{$QsQ(2EaC-Bu-6)i?lie2sYx-LyegiO`+1!r`vI9-4<mj`7StRIiaKW3G#YU+
zqxFk&O7M67P8F=qk)D`~9P@AsA<!64A`eALFtEBJ+hCx*wPqNYd|f8S3Rf7p9fe`}
z32u}h;{=u$XudsCd8L!ZnC!n_I29bZa)E6gd$%>D`z;a`>H^JF!G%XG4j{76EF;MB
z1%1niGbd}ObLhzG(k!3Z{QByvcDQsZcA7o(1ZR+a3wPW#ig&@`e`9j!^-JS?{`jEL
zTf7$U$lp;tU%IBmo4z)$^OCLn4E*SKDm5B)A{{LT*y-34JEheJ9+RYy!TS%K;i&EK
z!kcf)z&CPA_u)Qg5)oB!+@wS_vvmZyv(fe_!Lw-;?%b6wDujUJ$n~Ar#F7dwUc0{w
z_X*tO02p?d;ji}eVMb$m+G+TwukGJMnQ~U|!~%OnSj(F|qia)R40%0+e2~T$ul0R~
z*=c3<x-2&}%@q|+GXSv;3d?slL<W69qCF>{5+w77@Z!6AEZd!j-eb7oUIktX9E-|k
zMCS|288*iTU}^n6Z{udPg(f8<A7HuV&$$r%@2sLd(lEt%^ee-}cLzO2%pK&jRpON>
zQ5vMK?z0AVu|5-}_yHP0U5c!SwOJG?8kBZ@%w6WVS}CmTk?J~=VNO$!CCJ(YY;94T
zz(xSDRSU5k(ITX>B<p@FBc_|}{R%Tn)>-m?Gj?(%xjLy7XNw@48r_ZXmab(ahBKlN
zg4~h5+mBs#BOk_7MN$c1N%|3Z7DQua6ge&!H*CZ?t?=OAK{W+GeQYquzzdsZMHLc7
z<0T{z9r$91fBRf3l8;A8nrrY?&3ll&<Wo`Cm3O45swRCxI1!%g7JDQawmUEq&~4tB
zWn$?}(T{Xw5$It52A<#Nk?4U|yD=y_TTUaWfrfB}2>0RPkx~nf-~ntDu=5^YYNj-4
z*Jaw|Wq0YBBb9x#!ojt|om7akHw`30`3S-lwnu|akP(@6%izh6hbNiEgu$&BfkYrP
zVl}vB6o8ALK_?IGWLWhpIM*Z*>yIr^ii8>=SY`H3EATwHC5K~%)k^hKBteSEOZy#b
ze(l~7;p2k<`}1dlK0EEc|Dt=VSzRiNbZ6EIu*6a7*#H*9D#+4HzP|$jY^((dFHffa
zle3#cwS&qFL^qCxK0i=q-~`ovZ?ypf9z6Qmb_b;9V>v;K7Xm(Kbt)!a>7E7<=Fr@T
z2{H_|69hPce1IvM;uI?32~1ud@R&g;?pz(7J`o$Z(uG)=5q8G3Ovuzu>fj2m^-hOS
zx%C&Co#;TTGGfAVgB7{0fi$bF0za8`@*Cv&Leopk?Z#d9<4ZNKdX6(}4&9XVgf9xq
zDCa{@BetE={WijzPiU+d{P#vfQN(KGqk{wrYnBLz->*9aa!YCy?tGa{jXn-+j&Z9?
zu}pz4mK5>q?-rGu8I}a5_F{rB+cfTJ^qvT7y2JFN&N>QoY!mgWPu;#n(kEOlMl(p4
z*&+lY3t~Fi{qwRalt@z`MqG&&uVMP&Hx*<m<|yUPwK!FrglBaa>cl^ApBuq#kNZNf
zzVi@J4wcj*Oc4biArHcaZIj>npevpXu|U^)&{UZXL&p+Ic*$2z4&j_Mg6?+$a8^8|
zgNw5>I-_7)oD_azQo91X2d8%OvUoNWdNdS!G$aIF`!Ml_QmLpStIQ1g!+ib7&(dUp
z?P!e&GBFbil6%CQg<`?UXkp0DQ!Zmz)0K5_CpJGo*jM8odlaN=gWq#AT$jo<+Uj{S
zM0C2%?&6oBIveVLLCE*#;9ZLw3SusrO};uGDqLeE-m>%uivgBtVj?V7X(J2uQO^;+
zrrOGIk|wX`vinJaTQ)#pvJ*MJC7tG-YJ@@GH_{SF<P!pYr+WIycIu5@@NDI>oq;y)
zN1*`8vP!-|>xAvpsMc$<moCVS`*Mb&#8cED)s3IjsKiZyZ`Nb%7!mqorO=8v`G+Tj
zPH5>*Y0;8k&+J|kSmv8Si`<{z2a<wZ0-;?bKsJ)W+8&PsCgp*3uhRo2r@&slteij_
zTjG({MSj^8<2NH6YeU~(fbAoI_)@z5j(pve{n?Dv;f(;=pvfzMJDq4EiVMGaZE-E;
zkzVs@V%kNonI7rO<B(+!GWd{JODMX=w4mOEKj(9>w)1fF>lqkjWv<*&pvJZc19aY+
zGNz9nqPMTiiuRU<1~)(>y-KK31hsA?IlWzYp+y5?+CVs5l6(&@Bq5bRapa^gxuJ-<
zm9lJKkLHCc*(XYh-w%sC#JVzBCC)&S<tC$#LGa+7FW6FBc~{5DIoHH?bx)?A{bHrN
zGg?NKJSg0qEU|<xjjAP$E6207#l8Q)oR&df2#ke6o{;*5Yi#U&$4pPnjY7Fj-IA)t
z1#07IhinGwE-ThRtcY|05%vz>lio4s)SV^M#Mx6eY$Mc%iR;y&BW>)ql{+sFG6ZDS
zcNi75*(_W$EBU%oX*9D9zUwBT%LkK2En?stKkb+K@pa+;jt;?P_lP%5)2Q0a#1ER}
zy9Z7t?7<2wR0G)hI&iMKT({p*I-k=yQR>Gp>P)fD<o=dR*P(87{^m8?_WNue*+nPt
zx1PCpTP8Xk1Ke-ytzCV$*YtB=eCwfzWHA+*gD_b3W-Mc-{pnRBBM{G~I+M@Rj3hPN
zzK~aPJ;{}eBoyAtOe<rnzRCAu42hpkmLtB;+)wAsC%y`ZieTDH-012R&j7B7#|PwM
zbb|gQQJSt+tpT=|{aZSnOt?U?TVx%|%pW`#oxO;@Y0dB@_lAD<G;_a9Ex+RAzQP85
zvM>k|6Q!!-8X_`9&$#mqZW)TAcjeX0Oc~@<ZD=bINv?pg6@-DGh=y=YkT(B14JA1B
zEo}@O2#D=ZCQtPrvUmm0KUut;f|I?6r-Y%MiOoM!`Dit5cT`oh&th4YIm4fEN<>7Q
zAVRnnK|jL4XA)ABWTj8=TazrAWlLntg_yx%X!I2N-yTK|BKfbamg+-HmwGt}eV!Pe
zaeYvJJ5$gF6rT!Dn&x&st-P&mtt4)JeI5Y+_;?lz4HrZg<_0k3@4@NSh@wwM>||k0
zM(zYgDB?F_%{wTLrvR9k%=-=1g6QCeT=yA?-??Zg*mO{Y)ccYU5J_=-;633}UC|Wg
z|6gTq0T<=kJbu%SNH<7#cZ+mOcXxL;64Kq$-5@0?-JR0XDc$+U-#O3YarOATFD!g^
zVduNo%r!IDwfEll45E%&Cn~P8tyFhd$UaQ*hf+Tkw~0nQ63xLulDl_)>zfheV~qL~
zvsEpx`2{%n>}?p6KWaD+4QRl0E6Ca_>1fv)p_D?iaCY)UW|wCS&Vhrc3am`}u9F6Y
zjl`O|3YRt7_Eu|bH>RBy<WomxSxDCAP3E+C76^}o<x4v{*I(2`Ue-7|ldGhiw&>}&
zwnuGBFTm6}*InRR?;m8UaV|w;6`G*%?72wWNVc!`Y;q_K?VSjw1fmGac_my;=i!bX
zCZ}Un!-CEvnZtg$58Hg93NCUW(IHB|ih_Co_keO>J39gvaYW*7><7KFPjfwMB+aO(
z43bF$l9|~ktC9Z&<WMBbNn`MftFFxE^aJqvLEY&o<&1haJos&lxa?t0gmd?2LAHlY
ziVUn&100b7;=Hk#CD!!;JdP%f2GfT3x@rN7>tU*#Au0hK(%=`U8Yg45Jb09ve)bwu
z8{kNA&f?3T1>?l4#AZ3Ni%}zGxU7_pDO*d^^N4^I+F#_uY6UBJn8bXuE)+eHLag%g
zZj7puVb9jWPMpmXa$z0w6EK+bPL%CVg;?&8Lbb+rfjq)85sdZy_C;5RES;qiw#juA
zW)&&ogVk4_MZ&tq;mx~GrmuW)Vd_3W&GB0@(H*gnnB~W^z<c;CQ1UX{K;mP%Xo0C~
z`?A5j@Fz%RG^#FH8oHjenmNC3qn_8AsTmSNN|AS}GTA&(a(lw4ziI0PNdk*}pK8!#
zZY;G*<*+-rmoh|<Q(e~>A_M0UGOFNV^%Zt;!&hoG5u&wY7G{RG9~PV|0lg*keprR@
z5~(ecgq2?QxFS%-a#GxXRo~Mg&aO;P_MDPyrz+p1uC=O2?(;&8T!!3RiG^-Qi2&h=
zb88-2v(2+Q6U$*LbtQu;wCk+*=f(ko`i0N0lGFNzNWM~B!MUjF@K(D%G~a8Q(&bbs
zrx_JVK6{J|e@5@bNqL7}N_U+~Id{@=(Er|OOq-%vtK@aNS&?80gjbKVK)_h;)G0#w
zm#)Oyi&LLtxX+2CC?k}5YpSP2ILLJJBOV{@wo>Em-=z0e1y%dCXejG4T@4%FDHd)u
zf{R7mk>8}>L2V}@U1bvQ$+AT5-Mt^mBCRrhG2XSzQ5d|%VG;~JmEHi!Q)h<PgZHRc
z!ZD;jjI(Eyx4+G9rJwm4QnOAES6T;@vkIInSr9#9jpgwjP-h(a8qf<6yIM^MawWoA
z$_AwV;IS{b<cL|1{hln_XTce=)fmHS5bd7KVTO47pJZ=JQbjD1IHp&G7&WcNj{VyM
z--;cfoh@q{8`?-U+q8Uu<Tw<F`~b;7yls|j2Ey5^WzlWA2amX^Yj$~3nsF<j0WT41
zj}dKZ9re<(8<w3qmPZ@ptI(rLd~=A><sFgi8W=>@OHlCMW4ojlLKwxt*nm?5R>shl
zvoFL0h~7xSF>>a_c)^zW4(xaoY4)fKNFyZsBw}aQ?DLCi$Q7<O<Z7OxmT$4M`CcJb
zn7+NLH=FQ9o83B@;;u_bU6PA$LU{UWmHH(ZtBZ&+B<Yx>^l*JQBcYNjkTdu-vT*RQ
zP)zq5uAV62!1so>!0U^fHlWA{{V!TKZ@DB_MXr;&4THDYPd@W|efDELrXw-6!=aec
ziD9dW^^@JJkMhHfiGh`HP=7_TU;vILIz%g058f=kV0POg5R(u6MBU;Xo51psm$Vpf
zA}v2#q?-b@g;Kg?XT5N2T)WiMb^LbI9^)GF=R?d%-GLwafPsL@K!3j={OCX?WMyn>
zWk}5M*Md+crzwN={ldn+fsZ{$N@d!$HNg0dmhu_&-0xWl_@%(aZl~Y|q2BG6CTohG
zE7y7!ib>sq-sXfkqfKIr-L8=`T^**`kMh`WKepUFK(>)JSL?3E2W-M6RO%7=Dq@?K
zs;bl<rx3Kz#D|H)^?PKsM^Qr3w9~7`i{qS+q`aARoh;~+WI6t{@>MP6sY!=&uP+K1
zPFjc}YmgT~fInK};(pD3c6zM~U5glW6Ys(PN+Gq+9zQhB&`d&L|LVOJhc-rk2_9}@
z!z`lz$drNlNnru>?Ki%$Z_%-yh>m_U<xo5{qdBk9@%a@L_}Eb!RkF?O6|x5J7B3pi
znu3OD2`CmnqnA3@d@cLLki(K8A3m->>z97ng>H()$G13gC5+m#kbZjqDz2C(v$Aqk
z2M04Sp!b}^<=qG17DxJU%)s!1j0iC9FW`X}1u?srJo0s<bp6JtSlJ$VV{MRF;;>s1
z6NLTD3aqZQswwngmRi0KugH9<zwNTC_z5&I0qKCq8^~{=`7`#b3mT1loE7zW2;M-I
zTL~g}I+QgyA~(_>TAZSl9gf_&4ye+N337xp(fVp=fnJFrl4z2VYlzH=tL<`+_OmyL
zE$!APAOQi{)BXMf2S9K5%NJ@@VO?;h9eItltJT7(KP#OMRCFVh2F-$44AxOU-!Ne}
zy6Y_Fbf8L=_026!m6tVdz=^X+n4=(3$Z2Yrb>=xOvDRKk-FJ+U`uf*o5lD$m=9&t`
z$|0G|=hKitA9^1SMJP4p1jS#T6(lz?U8TE^ct4(fT324?bB=yrzKQ^Dg58oe&IfKn
z`#}GiFWZ}z0KEk>(>tpk<XrzYSlBvojO)At>w)<-uv@3h6X}(A;-Gh*Uxw*<25^e_
zlcM;O)KS93)0n&6O@r`fs4>0UJdNeq9i+oB7W?GQHJHj&9xV4PpgZOn5ax`BFR2nB
zIA7($FMwz^7{d#|jw^9GT#GgG`$u<AtpYQQZ#gi&!jzw%QSWpRkasBR4XQI*)@f{+
zaI?Lp+JT>Q((>`+*52^n+(re~YJ&xBevdx5BOadE7h`rK3sK$BGg@02w{LJc0W^#h
zrGXPY$jCQ4_&~|Mb)&aDZPy?irb3IPbnTjVZ!+c*-%|owleVlwNZP`a%si3mrk@-Z
ztxFl)yT&7IyyoBD^-Z#MdCC`5IHjm~UFSfw)azC1PFbRH@P_a0iwi3n`Q3=Rv?eD(
z>u-%&E%By}9JOqL6l@1vCSBExLeR@Gd*9GZ<6Y&QE_1_GoJA<{_Pd*7M!I|rsJWs7
zK}0?nvpAmPYui@xS{@t)90*dXu2kvIk|Ju~8<j+V6LYM`N_OEK&ydQSjbO~-aV2(y
zs$*WqFSQaUMxHYwb#|0Ok2RJr#@>r_CXNwy$h6LgUdCLQHGiL%Eu&1G7PT@XAX7&M
zOpK25t)>&Nj>I=3>#eV%s=huaF4u^HW}2~$F)Mu7?4f0atzT@FEd7ztLe-~ny6lsY
zmM0s#Ty$#HuO80XqoPLi2w07m9qr35)(2hC`KGRusMz%CGT1hP6N_<0dbzc|clKzb
z)EWy4ROAVcM*1fL>iQ36SRm@_ELax=WB&f<tPb2d2I^swPRQr|`H*WiFDLW&xQiW$
z8Ru%<Lmf&md&Aejb1E`SAFq+uizJn}qboY^n{mr(FI>g-lBuftST#)N(^)1M&NPLn
zX_sG?oS^}q6a^%-6FpQpl#aIPHD_p9P#B%(M^J_d4uk7c8G(Gx)=AAjLGS;lGk$mq
zp(?X3Z16V!T|jzrl#WQzI#=b;THp;1!<;dEF%ByWneKFr?#{6BYsX!%nmVvHjmEAT
zYr?>J2~yKce2C&*N%(0YFS>|jEJ&+3s6iTnbQMl`X*aj<+{G9jRFfrR(N;Nm=Ek6+
zA`_}{Cy=J(sg!hiH+4MAM1>=o5m^>#jV-WAxS;+qwj8qvv{A+E`Cw?4=@de<O-LJa
zhV0mW!p3Onf(i9-Nm8fRSsWT-9~eDkYkd>%s7+(c@`Ry%MM|`jya%7mZ3XOe7|J;(
z9U}yjh!fek6lo!^!tqfCo>J>YSU7`Ob#v93XnMKAHhe7*CbRXPwqOjsoZydexdg<@
zj%o$XjQseMKIk?sBc^53kS3{;OtF!LX&F>(4Z>f1P^s0D9B@aMa(vO{K9zb8(blxG
zNX1-?5uQ4x&1m^Skd<7)#M7}Vhfc<itM*987sq^hv!7zHeGzpZm%eA(V#BVNm8z0Y
zMgzKQyLkMzzj3@=0a$E_!?rrj#Y|?CH@tOZ>XeFoFkzYnv70QG(mFlUDRzk7owVzt
z*d~tlfSJve<a^)Pc_IUQNHOY=fd+=Tv_V7U;z#K@%ULjjjJNY27chZh-nvDC+<wXD
z%wB(~sz#`cHpobn<J7q+Sk`I9Ug;W{lzlICqP0(qz9R{ryWX?qkE7xjAW~&WC|sbv
z%S!y^WW<uQV%;cwcE8z|ZLcQv(qu9FjMQrSlhpRG2Tcl2P)>4>y2FQ}3IR<j2id{b
z9o!ho6l{UAK%GkzM99+lA9dF;G-fbnZrl#XT&dXhaW=8XLMepmakXvJTEHd0S{5F2
zT5sc;EI1x+q;VeU`7k;U$>p6{E$5ttdmvAij(;JDz1;I)TFE|x1bfHAxiZjJ+Uyg=
z$<x6ANe;_}Htw7u?Y?UizRu|!nqDnz?z6%3fz4SGWRF=?fRm@2t^fRiGc9w$k+<zV
z6VK=({YSIf9_V)DT4OUQ;6;!Vg7I?Y0-14S&IiP_0awL<XwHw$g~^^DrZ`BF&d)44
zc}9a|wH&3-q_B(~MoxW3L|n)wo2#hU5+;PwB4Gs3YQ@$udFMe}1Fx{>To6={l%~l`
z`*5YdDTlG=pV{(eP41strX3b6AgYq$>87;`SL~kS20l3ZfIAQY$8z1glY~9XBf9@6
z-C>H}jx7%h3g>y`0{n4j4{={I<XuH~kfFmN5!2-RFBph>^l%)^#nLQHCFUX38WHs`
zG75+0)3E96MbB<9Kkl3<Arfsa^wLTONZ-ku$`^F!NZ{In*;S6--pi*jN7TK`sxnxN
z5>PyW?zvHaoA++72T`DAp~Y6DhR3aJ2?k9j#sPo-h%E+)oiPHM$S%I7c1~fW-U^E5
z+JVU~m4Rx1B;8@J@WY(qWx68I9+h_oskD$|>*#4~s|in)<P>{?qV|zhOl;*XLkttE
z%PlWMFK^DbLML*T8_2-^8sQYq9s|kn`Q$oIOTFop{yv0_7n^ogfoalY*1O?c$es@^
zfnv%Twl1$`LT~tk?5HY-3!U_i_Nvr#Wo71%HEsOm?eXn>p_%m?C&b{%g1}4Ca@v;o
zqG-k{KDlB@98dXe@a_e{7fe+j*d<kr*@Uog)6ub?5-vqJ6rNEyVI|CE7!dR5W^v8%
z_-4{_3+_%7<u@XeLw<CR2bC|YM~X<c1if=ZwiU!&1ew_h9_n#f>@z^x8?3CTE-SGV
zQTIj50pa+d2GaS!NZHB+a+z!t{_Z&VGOJZEpDG?m`Y>$FroZcYQb;)}v<@*kNIdtg
z9<wGV|Du4Li7kStx`1T8&OtW9+n4&xNuPR-4M5YYN{ev`%hUMF`Ry_YM(sBqno~Z@
zh}o<*BDt4lu?sgg_%-&wy7`>?;LsLxiMMraW6i%Xu~Wn)&}w3^i#dVCQ(&T+Q-2A&
z;Tb4CBLB6AfHJKdK~5F-5yDiwk4toO+jK9S;OM24JHZAAe6UuEFjGH5a&~uoWt0i5
zgK2N-ewHHSvUO)znF7D;jx%Uk!8h?vBNJJEmMH<~p#a{5az;+kptwhxDGAT0Ef5GX
z@n~$OSy?{%voYO4@iR@VBSsg-AdSSzH07L9VNfjG@ts29-uEn=TA>XlCpO?Onh~A(
zlnz!+mQ+(lrgorRM<(F%MT(?^nUb|~b#mA?z`0SwTcfO_D6wlYx7_?Z3TQp43VDP{
z2(-^p6a}>ic?yM&ABk@^#`CiJOgC-$^Lz(vzIAxHlfy+ScP?BUUx{D4voyE7Xk7-f
zw)WFnsLVzdb!gZ3n|QIKBIW1+xdh$3XNu_g@`$-lRT@7V0TiMAn)a6E09@Sy?rN-J
zatWrB=Nz$7ME8r(XJ87e01XUh`5qC}TgE3X_eakp8j}%{6*`K{@m}WJ>pPV39b^pq
z*WQU5N4g<mvodQ(Uf45=$bOCq;O0BZffp|?_uo;7wZ(zFe(<@V*O3C9fzufVnE`Hd
z1FOUX1taSTxB$K#X3+7pkyN2lz!)eE>VZ7dg{1d}9mLC8scX~F^=-h@c_A17+O?{E
z-|86LlmAFe?d2($=Pa`sfn}5t9d0g1BnSt5U;N^?b@{|rXYuTmoajkPNvtR=J6|=}
zJ1EvBopo*%RoE*LojJwpB_P65Unz6`aL03ofZ1WF16NV9&!Ax}qN)(q%b==;P+xm}
zd=m2sua7$Vf*fg4r~DS(up2HYeN@A}lwF6xcRv^Saf*oA2QfR*Zx_TbGg6^1!}hyF
ztD90bM;1f%Uf2dM-W<i$gUcS)McsSeud*8vifQ|DcARZGqR$Syrf&%^!KY^$kBF(M
zpr31;F~$q~;T>`d$=f87y<@I8vx@Xd>iq;hfO#d52ubhuvgjK@iV%Xpl}KA2<gS?x
zfk|1LQeNA-*jEB_>Uvp~7rY<*6|G)9#EC<DmEMEu?A9Tt+-&d;uGyV7%j8iDkH9rU
zw&y!~zYk_NR^xUB9u=yE-8Ov;*9mE|+I<;7^4Xr~;|=P&ny*4?<W+$>YUhMBXrwKY
z8HHu)Znp2s<8-(sB;F=xWDZ2ldL|TW#@s0^3#g8nBbEv%5S3_Us}dj(PFzq>2C-w#
z9`Xv&H2BFHqrtIRP4f3M_Py_ONmdoaVkv2ZGTv8Y84ui{RFtx#uW#!Fdy?|K$_MUB
z_Oal7RTR*DQR&L^0PT0nRG1}0WX=nk%^h0l^}^@H`1TH5S{|%Z6kkBpJ@XD}T%%6Y
zs-IL3tP+NdZz*4(2Ak78+~X3orjcHrfh?Vq_lL(gURXPPrS|nIgIr5Yq>Zh6_ir~c
z`mLXiVNNytKC+tP&+gb$!he{7LS4i+tNU;pX17<Zl;zc#eE<RWgxSbRxjDxq?0UH|
zP%ScGZ<&im3mTc2I$lArW<Sm+(|$sj?uD3B@pU+{t(oqfGaZq8tDe0|{qvJq2sn;e
z$IwepoB69$2^;1pM>~ZYI4a14a>*h4VyOm3fgzd*5jxCCLnZ9z4!O&G9H~gQSx~l`
z16(<*r@J|1qVS+ijZJGqol0`AmYQA4<ibZvUf#kRT+c?;wu+U|Yl&1WfT;>HDG*G*
z_H{F0VODaHGdHY-QPVT59k-iPr3z6V=Xh(#o|tUM$4Qu&Tzy!pj(SC6p)>A>iaR1f
z3j5hS=mxyhEuB_P>_BQU0MStgOwU{jC3@N=aE2G+B}CCK!zrT*qmbRB%^v#IP~hNf
z(Vbo@*kDxldoP~?zOHKgb}{R<2d<<WG`MSqm?f^H2V|gQR+D4-CS%x=^GA?e>VAoH
zM^^UlH>d0h;JHWYNW3j-UqPQF;dJdWi||LB-Mi4#Gr8-;xC`yNHC<S+PqqkCe&}a9
z-e;x*wV!!O&bP(;D>(+0$Y0ZC=^C&wq9PU1smpz~7E+xg&Ka8O<mjM{4n4sshp$X^
zxNNiMRqH2-K%6l!Uo;R=iG|nIm-T0fK-2~8MC^e&u2Adonr3uj5lQUGyttXM>Qil!
z{doDR7De5h1<8xPH!|E(paCgRDLP|2M{YQuF<n5y6_3T{3u_Ja=-6~!R*B_osS=!R
z5$h%@_aIZr6yrVy2IJxYlV|v2hd9nEqUA(>)qL*EpmnSZVrf&%)(KMardfy$yk@`(
zd~?{$v7Fi<qFV0-(R}V_LD*#T*w#nyG13+o*PHQbUOG1%HA3bD@69PBPl2q<lz@4v
zw;~bd+qtGHYUupEnYZ9p=kWD#KvJTaZV(R8bd$Z~OoC&~HDYWGi_F({Qk|vm9WJz{
znXg~jS5vzOzp-<R%hxT2io>5^DIK*vRExDY8hlA%f_dMN(4JwAd0%XJj8+3@lIUoa
zY4?FY|I939FkGQiswQmK_ZZs*Il`l<A>L};oT6WS$Eh_UedEO~wRIlBvK7~{Sc&0F
z;k0U8`w^!hjzI>-+xL5m4kfO~nd(fbkaVKQrjAO-Sx0QCkTeyp?8a;e+X`o2>$dm|
zM!m7lx?4hrw%RkcmpdM_%*r-btSO%!Yf%rHvH}hit+oc|-<v_!p%6<Q$_1vH#V>3n
zOxBn7QP&UCC~q#8vEE>t$7bjvQ$&<))%%ywT~WQU?29c(X6_;Ju@}VJOzj`8mxFfh
zY^fhdi6DSjuL-$YU@;Y6cJ^0V2vp(<ubXGBQDgmNnq#+f><o4$h9z@w1a*7t?B7wo
z>3Ca*Dr|3?NFSo632PID12&^=aon(}gMFTsTk>W((@82vo84kFQyjBi`@<brJOSao
z5<(6EQA^4Sd<khFLi;1H+K|{rs+U>kwcE&ioMu=yS+5eG$5gIe1p3krWxTzILNJ_1
zBIoiO3GB2&shz@bX@y#*wRf!3BQB>eYGp$|ZZ!o_f7p^hmQNOusg9K0C1YIdy{AF*
z3VIwN9QNE3cp(XD8s!49%ee@-dn?la1m`Gvi*_~)B2U0{!%l9)6*7qX{(!3}hR*^W
z4Vc_mE?#P$@|HGxEKistNLdX(LeP@$gsxCd5&0JHp(StDw1m4{LEO@jGGbgdbbWie
zhxco2NqGJ?uNbcv)N&L49-j})>8rA@L1z)cosPD7mVM$)wB<E>*<r2LvqIq--$LGd
z-9p|m>E@&aS{69*Nw9q=uzZ`8kr#1RgPVgpI)QrH5EXAGuMk-S1H<ggIIMCLIUvKA
znN}>LH(zdW^90aN=N`{6RSpaY=nnGtr>`lv+87F0TR9op{kLSXr=q1imNJSL?W>ag
z>>fdEDLE-^>Jagi`CJOtqF3aBiT*-Zu{B2RiVOzIy5pyzoRhmjMlqC@#+(m}D^sR!
zBZfqSx$?%Bi}va6Ble?%7pJp)pC5p~lB!V(R~73a^#f1H!OSRtRhAQm!-HQblJ^z%
z5%#KSb%Sy%yv6W`&=f*U#Zi+xfyb-*B6pmGNiv6wCZp;xxW<LHK%GuO5B8?v-ia`S
ziUih;GXwdhX$Ml>v<=)K1!isjuD5_UNN35-unTE}=8$y(<4t^a24j#<!U!tn<m?%E
zFtjf}dZ;yOEZhxP;JF?_nz}x7RjZuNluSg!FttFkhgEo%kB?eanSS+h;XGP&2AQ%X
z8dyGKP+8tPlcmM7WYi$;?CzPY<S5_>)kxVGMi%R#O39&6I9oLyAmb@e8(ytfpEPQV
zM>K)GUFwKmihkiDl{GF-NH%XJPrb;NEk|TbsiG1tGngu|Xwqq$t6(*OF5pnwo!W*P
z@@InVsX{EnoY7*|=FN=Jt3G<;P^ihPbNKWHt8yqdf?s|TH|Lbw9B06ut8<Ad&A%Wc
zjyJsCyFt!=U)HfnwIZpgc&N1?HPqunpp0#5w-kK!CrbJWDZOc{2@b4*Lh>);I(o`q
z-#GJGa%`om&Q=C_f-5?)j~(N%fYW`+^yq0?4))9`PZg1xEPh!KE&wiaAanxH4wHfv
zVcXGn1v2}1g=dl^AlS(r3XT>^olR}^YC^tqVY#S@Wno9dc`gq{s(wFyO$uv?;b^;P
zKw>q{S<WsaKa2J9TVs&Wk?y!_tiGdlsi14jW`e$xz`6r$leFzT@+XW4cOuN(Hk|;@
z`ZWhy6~AUTcQb`TB9O#RXzPQU7l`Lvb%#?WGkDuGu_=Q3Jjn(Ym)Pvgd+^mck<a-*
zdFiVKTeHCdPN$=rGwCiqk3Y0VyP_Lk`N^HnRe$>Fmq`Qdt1tLb4_UpB?++q*F5%`f
zTn^~d1g_%Gp<ChX(wt|(oi!b13!9ds%<*l<9La7lRVBJeOyEauMqX)&ACkmSmbJmS
z_MUK<s`bLw6GJjBI$_Ff^tbLUVtUvsd-;AIWCZg%SK7^!(evR>Hf>)2##qHlwU2ZS
z;;b>kw$T|)6P@M`{}g%P+*QHMmvufo);sRJqn=|O+qxur37JdeO=uI+%Ias(-6tnK
zzS0Av3X3aYsUKBJ@satd0_tY0=OwT}v^-or!$eQO!TspH#~@N`F5&CXa=eQL5XMDC
z9F&j12`4YFGWFkirF|q{MSP&<X-6w|3ByWX0~<AHm`r;!GH8jYA2x6v-OFgo>p_B@
zcOUKw?hKms)<iJx!ToyPQdogC2%@*_dbhcrANtTGe{vgdLdM>mOIrdDq075>X9knD
zwS#2lp7}r-L!af<w5`%z?k!#kf034vV)R~@1cx)PYs|=2YR<%*z5_zO=?g^4O|*Ez
zomQK`V8E(j!vPQH`!9ijlu3Vosmk}1r08I3@pr{ARd|o7P0y#t!*AZsH)O;F(9mK)
z;Ci3HUPiql0DB|)DhZvK0fy3!7!n8G`Q#AkBrmnxs!p?BjkP{tzP9>X<%o0_8b7IY
zlONqTjhzKpRNMFWXXp;;?v#`UX(R*%1VjXBq#0u996~}Q1w>L(8U#T?8tE<prA0te
zLUQIk+~2)E7t#BFHyrlN^PKhB-@W!)YoF%~&zxt0aS!|R+TSZe_olw1R*s2eC=I6H
zKT0KvExMVhm|+0%kTM#NuTAIbAp>c#<?Yf{Y3}OpCxYgJstZs0Xh#F?%k-c<Ov<5&
zJ@vQ_Dk-O~Y4{#w9qcW)wx~auUUwGhx_J<{CYrN*pWLJNo69gKb9*-HPBz94)Axwf
zK`p~T*J0|`eJ7ekU5EMjxFFpE!VJ5wS_?L?sn{R;6*W@_L_-NlanXAfzRj%S(v?vL
zr)>)QZAF9+MBA~Iuq13F-;#N?*y6SD{IHFfra*yB|2WIll$lu6+^8*E*sCxOOprGi
zdxnKugrOXuy7UDllAbeH5?xkpS@$k%CI1_yuc1NCtLj+8=Peeu!+wx)8sBg}elT9R
zh&5uJrrcc5TIfqFPBC_L6Dn-AI$o~qez;S?%}MWwC9Xj3Newk<fu#(u3<SSmv~CEA
zrc!?@z`z`MqHn&1L)_Kop_=fFLWps3h`r202Wfac=}SW>qwI>fB?qT1G0!k+Y?L4J
z^zpu#vk^;MHZJ_AFg2diUno&uZ6i;=%d-ZRr|1#3%UK%g2>V07NA6oF-JtWV!zHN%
z9u7jgD%-akobjd|WnZTImOF*s%PKH*vyKrN+!Y^a$Kg%#6dz&5E1Rn$AM*DwGMIUD
z{g7L5#ngc#->`mS=J`%GKfjrQa&)7DkCWJN&y$5OGpE%wBt7Bx>+m%&JcEbaa6C-g
zhjKUzDYR+&x&_c!^$a+GZ+;4VLn~Gk*p8{}6qmWmBby}0xNIYw^*z`$pOa1wnT)W`
z4Ammrnon$f?=>2B)wJ}Y^2oaty<W8e84c<#6qMaY)|b3hcNt2yJw&0>UP;U%Sow|5
z(ge!6WX{%J5ajl)3$s7NqV*Nc*X$4k9tta-YAJ;6K<mxI^ib2evaV$+_+vI1nZi_y
zE*+Jh0lT>lEF}Z|{nK(otXNWkIKlkaWp9X%8R9*nDNQ#&&!QtWXi%%j$5<Dr)O?v#
zqvAvI?tU|WYx;e%66R4Ap~JDji;m$yyk#{ZWOT7;ouw?VMf1&Cn}hW*FZ333hqM?s
z{4_<hH)}>4QIrq9n@uOX?&D2OY4VYHcFbE=EqD}+BnSuCrbwc>D!RVd9D5sFJ6T)l
zCVeeHm1ZH|4c$wh=qpRQ?%~vKy&N+k3sXm)viQ7daQvnt^l8ZgdJ)dGvV2n1Q9q}*
zIq|$1PU4Um!p)bW6z?KvI)o;`mi#iGc<Y||QKU!nr*ZMCN#-^XZce0ZzJhWV&rzH^
z+}6KG6?McpO)Gqq`G!?Xg=@apGRd^0Gbl~L;JPDY=Ga0a3#)<7n1|{r_jXN8bW0<2
z2*OJBfxT;WB(_d6x8S<LI{u4Ap-of9r*q&=rv9YkGWmE073UnKrw@-#)=#ao<R2M^
z^o7cKV(*4WjQa}iG`-ApzbW@T@*SO{mQj*UJ9!^I_tVcD(B-_sfItQx4vT|E-C#$Z
zj;Akdj{`M7WKXq822(!byI+^4=IxXHRlMMn{0q-0WXx9q$<5`Ns2{!-I=wIKuy<}J
zDEc6=gc|>2{^Mz`PkFuT`8=1V87dNI_jNT)VyO|;v<PEpN;*4+Czm|hG9?x!a)btc
z*ub-EQuV$hh?ELZPT+CmZP7-H!qRtLX-aO3M0JK#qIJwB$8=vm;6O{f@im--#i(sA
z-lndVcvL~Y<awwbuA0A^iFLHVy>T*pZXt7WifDYio|GtO<^=dl)rt&}PeL>Jr7V*a
zHXpV+S%iLwBVOcA$dmXzzwH4QLihIKQk||#6Q)y3ALSDkb^ffHuEWSr{!*eREUGu2
zL3}M+qjqb$88zL<%uoZqGlJzCEt>UU1SP@pqnJt~s!c@c{q~teB2+Vcn~9*;b{}62
zIUHB^wz6DXam3vd(s5V<PdpNCoBX2o1kyVt%%G-+Q%kKR!eoP|w@*5sOGyxVJD88*
zYyGpxd-<t03)5^?M6NJW9`d=$QPyJ{5p#62nVvx_p3E74f{%yfzH`dW6Ve8h9aGa>
zXc9&xe$}CmzIr)7=(xMBohjYU{4RI8=^iOxptia1mk`~U4J&jyZ;vhJLNmcOCazb|
zAkkbMiYBLuPlZzcU(vdC(6=3N`W-v6S&0HeEtz>!(j!%AR;_N)%GMDuxiKW&=?V8i
ziilgXZqi^2@0c-LT`Zd>PEhM7ekpoKB(s><w2X{o+(v+568PUZ0(*1E-mxd!VOh*6
z3ooTgBV8vM@?Hba@ut*H-C`};p9&W67><kV^<WFil2DwOdLUyBd-YORLdnm@x;k9W
zFnBF-v*%hpB-XTXL?9kRxTdZk5UslCmR>b<NVEePGh5TtQdY*UNkx{wpo_&GxCdhF
zK2$>bOs+~s*#}!8VL*AN14hA-|FZ6_V{2GsbGf#Oliz&Ki2=U}O7M*=9RwHhqlPaD
z>pME|+_n0}MTOK5!O*0El(*A5kNatKc!2Ldo*Io55qT5`25!$;geW1;wy7nguf)_R
zyjZ2Qeow#pq>B<Z*H~34WU|+p-*EC=Eo94Fr0@7`Rb_b_)<9xDl(fP?SyOazOUpI&
zaGk4~YGjQq%(#B<^Flvw_L7iHXR}G?NBibS8lF`YDV1qTyI%xgUBdU0wN&?b978KQ
zGwf&yNS>U3)18uxiN9~bfJ`$t8%tNqmycWOP}#KNH~FOn*$nx%3V&0$DJo7W`~w*(
zjDEj^67A?1HWD_^9tYg+w_-u<GsJ(g=T>iF$vnAx^3+?^f3^{AUeC0rp(A+>f>y*!
zddH+prky?9y2lpN_`ZP8WBqk69=?{@%<{*i+D$NCkJ=+;9OT>-xEBfKm*sEEyIfeU
zv324^;oaVB#xHeRHQ>j1i;g+Km#o~~YpwY#u*gRgSFCG5FSJ2_U?lT0({9*adYt*y
zY|;Bi{)C~>#BNH<$mjTqs1Lsesu$}kCC{7wkcu?5{*p4a7K--{R`V47C9N{EpW^hc
zQu`v<I&G1wfOvAniR+`@bEH!gqdV$O)A+%*1;>FV_iWIx)b&PZZiiPZ5q-jrpLH|6
z+a1?2IM%&JzTU~Um7M-zj^uHp19=J0T9UL{Vtnq6H`aIudwzNbC`6J^PcZe@iOjn(
zVQsqHPq#83VV!YVMvLxxM{c1X-q4k@-RiSuTizn8YnWIddiDg%_vWOagQdlzNtBK@
z)b_hh4hP@l2D)_b*>n$lJIEtA(1)<ENN90v85sx_*(bWL^_a%M+IW1tENed+icD<<
zLyCI*l!<p8W*I-HQ&SLU9Ed*LAm20pbiDblyR}r{mgP1|+x;a2jGzJ+`%GvaMcDV{
zG(tc8Y+ZgN&8<+*1<+X+0jo3xlf;yEi7nYzLOw3`YJ0EUZ|3eeer4PReNiIAFETg@
zB=K7A-8aTBqHpDObxOLnm)bpR>`5Xv5);c!X=9pB!i#%XxVC<|U!-T(he$or3EZ-B
z-#*K0>Av)U*cg4j>W-7?^6VHi#c$guU!K0$UXY4gzG?S~#4DSLLMmUS-g=s^i3SL+
z=boD0j38c18WG+^Hlz{K0f}Io^>#;Q(=^fdPXiCE#^7=M?}Ii+xedM4IlVp3`9HlG
z-j(dK!q~9l%W)Ho3uVkL*_n7)pg3R6P<3Cs;c4**XE+BT%2eEo(*`ajA%~kplh+up
zPsOdh<<6PRDpQk<r;eyHbRL1Go6NHIrn_lwJexDrv)hPd8X|G=YTgJ5_se((sS5*r
z8nAVtJm(D|LwD#yVu`||G}07<+@AGfim9pKr`I)K&|Wq!iD4}@XYrXO(~0$((?+hb
z>`-ZhyqdmebLuIW9KmjznEj~xL2IzI4;Zr~h>e_C!w_A?%09cJ6PI?FY>lsy`rv>S
zSxEcgWJ)`@m&Idg9I4S-Ntn8mC&GHyc6N2bEd+feRke#)sqj&*kolnje-#aO7BR=I
z!Z(8G#j4<YdhlQ~<zkx?B%SEc;u7EaF)RxGSmQ_-E+!TvUoOc{|5?0l-0NAgn~Y;4
zTMtk-tSv0})@y$txz!X``gp-=aJ<>#O}v!p#GV%(i|_R3_UlFUEF@<5j|v~_evgWw
z$xXqNlx2#!gM*vs*gLsPxR*0V<f|9^@yTN+-WB2Tj@#Cn29L9J6?{)`ka+c>a#VRi
z$n%VolPT&mN9KmA!s1lKU~cC^etR6=fhXf_J50mXZh3PPp;0lA;#|LFZe;o`Wo-&Q
zeka_W8r|Tp5S{R?@pdwwea5J$L83KWdJM%kJ{ZfG=c28T9VUWZN;|_EP*i*fUSX=J
zl;E^5cCh0;)6KFHBOl4}=-4m&u(Ru;HLrT`eO=BxbMq9tRCm^CgPiUS_Whw%dV`ld
zO}NWDbm!N)OaeYLEvJ4Uf82>TlL8}Oe$gxrU-V+0sF)SO7?3XjyEqzqn<o$o25owH
z^FQLd?ew(%69o0+R)}rr29{EXJtgWMn(7Bf=K4ZIf>QRThc1pM)Gi#^W!@E*22L+<
z1;1iC^w&uoG<GneSBDMj&N5(QcP<vaw-dDqTB~dhYZi^YCv+fC@ouU@(!!mM*1xG{
z;X`G0Zo`9A?}Vunq{l_W)Sc&_*dyz22c`5W(VG|Zy=aS-IA)OOHnQ3;zzry?bgIYK
zxM_y1i|(7(bmMi<V{K<wA@e&u%jUT}Yg?(<G5Vx(>RNlZ>?G@Kr|m!S8k?>hqoN)E
z2q(C6cOAE6{Ca|f0i6KH;`l5xZqJlzq7Tm6Y+Q50+DgNzsHxS2H-m|2jh~HwcS3n$
zx+=R?c(%>(OJuB^z`kRLhYWtDYtszX?!avz&K8>O{*jlThYMn>{Td=G-`9Sql@;=L
zC$;EFpt|+dFUJWyfuqChvLBMAq8-k?ez>FH_Jd5Nc6x*{d6(8uz@;xv=gFJ5TegKP
z*4lZY9RucH3O|uj*Z2snJLO%|-dMke0I5C;ROi~WjPKN??lhQF>pI_Zl$)=$CeMg;
zagCTrh8Q4m)Tz4&iUe~=;if%i-H@Gd7J9C*LXP)9Je5@(uVs@hdtkfVugzeBZxL$+
z(o9vx?wQ1QJ&0H+Vb<QrJwjRvPs!e>D2w9#8}qNx1+KXpA>}k>=8divmbr@YQWDpb
z+*SyL)O@os$3s_yTjkQNgn|_#-d}(2Hb7uA&MlHO)cE1Gp=z)9$jg$G1;Ms*yzBi(
zcLcF|-sXNm_VKmFAo@0H=<aqr^mKvet#~H=l+=s0MeTWa5}DX7hBw;YF=`$Q<B2(v
zbZM;4lJ?iA9%DPxzIOK%xP~tlga<L{OtgDNQ+3L`9$NYGzASa>Tbab6K~}Cb_u`kZ
z#KdbxMqh*^92W%hURes;Gi`bd4{@u!e?aNyx4(ZD7U~c#yvhf5<1!#JU#nb-8cjvE
zTej)ZAK2xtVvr5X!W%UpR~0*^*(Bu5^<vD`+~FzCpWYfh$POQD;D<`kYZNA`KcE_-
zJdDF9tu|Ck4lfuFzv&W0<`T5v62v6$S!|>4_C1NN_7j!W=dUCBcy`yYJ}~2LkIcxV
z-v2&!n$a$Jm;g2I_?Ar<L3~VkV{AU#AtH}P<YxZ^1xu{wj$>HQKd$a+zb4iRxrf_*
z=)|dYq93o)u%J~o#FO3Ec{=P)qZ0^KeTK}ne~Z#*RN>tx?^b*iF`dK*6vZXgW2$n5
zi`J#DdMle#zR%_-x7GLSrRieo$G7XJ1V~9~gWe~A&}6~qewiq%2)yT@^CfIbjE!w?
zURdM#oPSO*@sV|VPRJT9dH|f^WD@h)*^->)^q~3;f4J;1I_qT8dPdg0vk}u)x$jt2
z_gekEoOTeir|`vogxKRfwpcRh@ZvKig}LdGdy_rQa6~8<7oRbDm~?k5oVge6%!MUa
z8l&+{MFagvWHJo&@nbJ<F~^o?F{h}EOqleP=%Vxp1iFc<my8gPL1m}?TtT>cbf#bt
z)5IxtPLSk#T=OoVG$bUww6QiIq~KLToBv%|G!!umaSbl%Z~ei$y4hKDnQZNIfqu~o
zZYn&@!5}2gAY`Q0bIhPX^stKRHz&R9O-c-@OGLzyMUVG4D$S)|sf8gFF)1n6#2<==
zt%{S2f2-@Eng;Gm`$E!>-al$C8`_Q^ukFUhgyS^8P>|%#F$)z=M6{LXfM>yKpGAe)
ztVq&$D%wd7N}mi0C?4at8)YA-)i%i=RZz`YNY8ZB;t^cC^W%E(?hgWL#riZ^4SIS%
zb{Pw`2mY|L*{<p>?uV0uTcNj5P=fb~TEq4r?Q;|7@6RNZkTVx8X*|jiHuFF5&BD&P
z5czBHQnh_4tnSa1q;uo}0epUpQyk}NC-G7(#{E_3gZDHCqxRO>Zbx-&v-=+=)*rXl
zbPtYH$^^8C*&T~)X4nUVJGMD6-MBK4<I!>W@)k?rOz=6W$~kYkbHn_ic6fb=WxMx-
zX0gTcb1J4%=3H~&)2LGu539i?Q>l8)(aNS*=ghpT`!x+!uYxMyrX5EkuLR%}OziLk
zS`sNsDPevegs-1uC^%Sd8ka!MshZtcghR0HZ=|!KVi7xfP0GWO9W6-7oQFlKj@{W}
z`?01~ee)VnP@y&ICWy?iZ{llk{E^P;d%?w)BkpbU2nDR@BlI;$+Zp}~N?BQ-lf9Gs
zc*x}aE1HN9-zMQ5^#TE-b7qP|vAB?7Fhc{Tzx*-|4fSTR`F>wI^4brk@Ffo#x7`ir
zQ{w=Rmh0z`u!f+tf`r{Bb=Smvt++3hMX7Cw1Cwoh1wYbxq`orCZR7l+ZTg)_IzN1_
zh-!sK@B3VIn0%}xSpMMYDPMIG|IF}K0zH4PbV2(rBi2%^m_y%!W8IJLDZHz`!qT3k
z!XPvQsdF2D*txZ~;H1jL@*u4S#@^&xI%%0)*QUVY^pcg=+s5`kl=75uH;#@f^J-)K
zU0=gn+O5#c;ghlmWiD@@=M+Ba5BJCd1Z2n2l+T*v8vJu8x+!=pZjOHNb0S_bl$rJy
zKRzQQ=j}!_K@{6{&ZPzTfix9CFn`Mi>F++JcJZhg@vIzZnMM0|k-s<cqWA~nrhWft
zbaU{@?j8u!FEG3msWk_OyJQo-@j*b{gw%de7Pdo8%q+cdQyA<d%|m^{6n7xRo%qdV
zqf@$5>RerUTV5uoAv`+>_O^0d{pkYT8t_QelnKeuZgPxyH?<=aM@T=!*!aZv-I=g3
zY2Ha4B3-me&%Uxn3i+uF_)!=Fhv4MHa97t5UM2}K{Z5guZWBU)9uvbcv6a^kTqjOG
zjre3nPM}nFCl%@Y%|=0uW|Ff_7W%tgJ;UFPgl$xwWOxv#+bf+$W}RUY`J7Ai&6TW$
zb)?s*xgo5^z+JhoKd*=QKW+^GKc<PBk#@>8Xp%mWj7Ps$M+5gPCTR|kU4kOeSGG-V
zh~4EM9?X;>Xa-kHo*|un8$j$QkN&7F<{OkH55{=WzL)_IIYuX<b?G#wmyRw+C+EXU
zq7U#_N4C?#3`@N!FGuhB!50awZ$?c65vVQf%6F{v#YKj(L0-XVRbWA^1a8#7PuEUt
zC9{|zoWq;cMvj~QWdjBmw_Ze7B3dCE@OR5QYqESMTDk2l-TU~w)$7Q8BE?@%l1<O7
zHvN$T3eJ#ehA|@HK&f*^msE$!hcyC*?8V=UEwlJz3~}hGzPtm0AcN|kBLa5mky_!Y
z>L98o2>Q>6>ub?oNQY!_aJxJ*3Osv;j~=aD>>ECg*b0OnaUBg*4}pVNO^}hm-cCqw
zKvXDj<J~1>gufNp(bIkqCDZvlOFX<C*(!d&_Zr?%_dI%N`WD#$T^3b2${9W$-4buA
zKWQ*J3OEq~2V<GRaaWc29h>d>jcMlTG8x?F5t=0AF{lro9MDayoFp8aW)%lSASG&@
zwJ>|0L;5lzt|K9Vuh*Y}wp7os&T7PY0}7E7(T|j>df~UvYfQlb+ReRv{u(C4aFGL}
zC~Z*c{Ap0F$^P{{k^ED@Qho3g3>8L5?qooeTNoOpbx)4#kvGLQy>8ABtRlb~s$e8y
zWMU8qgaKkLN|AjTIBeDhyh9-efiQvopOyo=c*UcjWuyg#I9vSN3|yKb8;>@50~t60
z3>-8UXa3!CVB5f}TS`jYDt8stdCe^xEgZ}(9L#{v)2$tBU3j1#(Emb_{KEnI92hr1
z|A}&dK)JoF9l2dh9$!)O>XV`lIuZ!vfC>T;UP74t8=;~4KUu#juqrFb8A1VpROmpU
z3(x%1a$w289Dgd%QrEdBe@p9kbEuy)bwvlvfbl`3|Cj@P4lD)ZUj!(`(a6ri&BE?i
zM!7Y<?1dRHhrb%Z-~C0G1DgOCj-G-xyhcXW4%Sd3BOXW3U+1uXr=`FeI6E9TF1r8?
zSk_;y&Hin^T9P8m%=6X!0Ps}?Fax}cQU7i^u=Ib-%^>!UCeYsuT_RWzjt{tx1s?<g
zet)(cSQGZ&L;tkBH1Nf&aovw@7XV(~0;YUf_bI^oOZQ`I2NOGMFAL}2jNNR6RxE-B
zj0;50wVxwjIBk^ZH|AG{H?SNis=xt(5ON?8kO4ni4osZ#Z+jaPHxnZ>h@G8<8Ppo$
z;PM+4Mv|L`v=kr^hcO5QbU#}TjDhXXluKfs6{g6#?8`AFbAUj%3xAD6H<^E9EKQ&m
z?k1kU5u&zvIHxHAoLd0`UCt$0o8J>IiMWUaM**!ULm&kBxj~@IVc3!TH^R{w0)+q`
z`Hg^gIsK>lKn~dm{<@r+RadPq4So>~59r|z7x@DaehH-+ph*1{4Q>|BF2FMUX6(~_
zMwS4;Fb_B`E2r(gVti$2!02cCT~6V?f2^SvE>M^MN<w`_K4S>Li3Sd`3*g@^2Zq%1
zw}LB-D?+5lsML6YLNEfP6zx9*pwEHjU51!?LM@D3pw89~mPQxY-?$|5g`2qskX_V3
zaRQ3l&z1ut?7t-FH>@kd-khE?qXIA#AWx}(qArA`4_$)&OxQoky4$UYEdWT;qW^XG
z_&oaGq${EnmSGi0fT%M7cX=aHow<biD~(N@olQLdq)M*MSQ-ncfAc`qy6pI{+5ZD|
zMV1~XN6#Kmm*fCP0h9dMa$o~XS6OBzc6O#FX10G)W_p}76^#o5P4a<23_n>Ho=2?z
z-?U5OE>e7ofFErKSi}vZU!$V`=qm2-YV#*;%r6$<hk&SZ*sn7^!2<s;W&aoRe=Wc-
zuuIY|3P2q9j@cRDw{Jihyc}aZz}3KC1>j#aJ8Ku{pLFe;Y*ao0R&tyD*T66r{BP0~
zQ8n-SLk58u3juIcm)y@Ha}{N8;`k>`$!*Ho7rRw1P@XQ&j$7eBL02S^wLE0FxBz<u
zRO-u;(r;fSxmf(sO_<ay&l-WCR|Iy?%aSnf{wL^)q{V5c+rEIL4q!XIoQYuVOC)<&
zJE%3(%Gtui{10V|X)fsbEZ`>*U`xK7iM6`_2kMF}6+ffT%3u&E3@E8gmlki-@Dl4H
zxQu>gqJ@Wvy`!DQZ)&QncNv#20J{cwjrkJL!1(_Qcts+mPR3y$K)MCU1g<)Mwj3B0
zaQ*tPp#MwapOx7Y!{2)V3{F7Z<)EN5|1ZuZLGOSpq*mR)NcIAQ+~UE%T+R+zfI%)+
mCe9YTf62P|!^b1SV`Jj1p^6Hu7zo4-{OAE8kTwsf0R2DCBM9vP

literal 0
HcmV?d00001

diff --git a/pyspark/lib/py4j0.7.jar b/pyspark/lib/py4j0.7.jar
new file mode 100644
index 0000000000000000000000000000000000000000..73b7ddb7d115be2fb1ba4cf8003fe90c662feef6
GIT binary patch
literal 103286
zcma&NV{oQjlr<bX9ox2T`;Kkfw$tg@W=D5yvt!#%$F|YQ*Hh0t^Ul<~_11S)oqwxN
z?YdT-z4uyYA0;_(2sjWB7!Z(FYa|hn|MtOxK!C_gsEIJhC`dAWj)Q<G{U;O#q~$ME
zWKQDV@Gso<??V5({tGHEq97wFp{m9xFL^INIRTJkV4OpgW1yX$oM}{HUS#`y=rSpW
z>P#;Un3h%ti2|iyc!=?8&xuw+lT%T3%cT`Ln|eUli;Z<pMu>5jj{&$LBKpTp#Uh}y
z<HN(bs9^BlCq1%zw|lk!S3v(ecF4a0IeD|&F#Q|z|Au_~7h>vYZ*S~i=KB8!Ywl%i
z?__8G|H8umyRW^YnYrEnV^idRfiC72cIKvT){YMUh5qLk2ZY3Qru>al3m*go<zKX@
zv8y>JJEN(cv8!uyCcK;aQs&6lj^MNN6`7M5_h6UT#6Fz(*dy(=Fg={4C`p$;vI`ol
zf#{?dSW^V2x>QB8&W1{gXcHPybLeWJDUJpao9SuHrOk?)MaR00W^RF%F5mv5wrGjx
zr*D3;t0S@hWYd5?kL9(a(9HJN&gav%-+3!jJ_IvfCbguAH#(V<e^~q*wcX}VBD$t(
zkqwRTTM+{7lBr62_Os_K>SYss?DK(DVkP(J*HyKV5mq!EN|-4}TgP()>N=pjAsx)(
z<!BR(<Z^3sYt_kGPf1B*y|r9gn(`xGR(6AJHt&YpCL2^oYl}N<)ES>gNsBs@^_h)K
z=~4YOK(4WzlHGnG=`=fy*>q)ZT49THfHQU?4X3f`Ha<7O;?sVgh%Py1dV6-g!tjyd
zvZ|!Aq^PDlTmQn!q%d`9Sa^6*tEr>2{Ivxyix&{zk8O)vZF*^p4nGX2+*x*$Qgv@B
z|Fe9yw3Tar**eX}psT-?`&Ps#dq@pecm+*we`Zoyk=YI3lu;vH%GgnrK7;uOfjv8J
zNO#@+bC^KR!~MQ-;G;hlnCX<Z-_}tQO2Gw3dNi?>B0Q{yJ=)TQsiD-`T)t2ngvMdQ
z+;^GxbCFzs(=qP+<EV-&y~3VD$mB6DZ~7>$#NG#Ja&<5{f4GN@NEHVc6K#qvySEx4
z@4Ok2y_fMr!kPSGl!DQYB#xqNLBbJpk<mcKa|$}mz;~<RaAjquuF&`pTC>_guhC=c
z4G}N9h22s|$8l@D<+A1C>4|`#)zicMd9%LJbLDEHHbXzTgVld&$(6A(ZXeU`Td4jT
zSe!$Zu#i}m!)ilyN#;J<Ue3;X_fnZ!bW7$yQ^r$ollcOVtcbv&nq$=l0&(hGlN?nR
ziCg9Qsl$XuHiQlR?0VW1Zi!NaElcof47nxT;oCkA?F#sSCyF={X1*zQ<&o~oH%h}W
zen+^JI~>+wNxgo{&8Z+(4NyTkrBjSN#EIfH89MJfin1cClpSTIZApw_pTm*%!zG<%
z+|o;~Wl=f;C$Q8%WxP;y+Pe^(E9hjcp39~%kJ|HC3Bj9qsp^?dBcKq6)NGo-PzB~2
za$x3%e_syTLll49Tvbs0V8}rG3SchKd`{fT)~Xe4^u&egqMd|}pk=<rl&>$~qJAEQ
zj=;TeH$d;ugLo4Qv$o%DGLm2rn`&<mwJ3+0Zv5d&P6}$Z*{X(V{7o_SJAUU3CRLL3
z#iw|Y?rCw?+*2%>-#!&Cw2QwviGW!ePJ_ZXx*O$hzq=!L1BRbme_johyl`9WkxVZR
z;NM8i1}Um+#hb7R#$Iv<G2#yj^2f#hMivYoc)^eRE%UH1A!rD^tn`B08D4l1%<IGR
z!gSn)_rfw9^yRhz9*fXYMBIMJ3xjCxv7*5><iP>VsKb4kv68W)8e$caar+3`@RD)h
zOtG%ah^U-GcoMhC&u7us2g87@#V>r()(F}kQK}gZIvdl=$-rjD&5^Vb6>c^y3(%-(
z*TRwu{16m0-XCUhTd`p*st-c<P}5|LcY=(T9VXkZ(z)hhI;M^MvDKD`EE=y4ch}_$
z$>1<r_jH2Mb?*IwHVk!`8tM<GF?shDDazTXa1O-J_ffKj_V9E~+%zq>yoQFv#}Fzh
zYh~m+I%cKvk@<CDDB9|hmZifX8afk`uzbM*4@&kanP>&WHM5W01w9GH7S?Ol?rUnx
z)HXxv>D5aZV~-MAx1Qr6l5ugD6%%S)G>%RF`!G9YOYVW%%(XS$i>PJav3rKychB`;
z4R&#zqG;cxrWY%7;#JjFJ)JRne(=0d)nx6A(0;IS?i$X95jg<^MtR;?QlEI$Z_sEj
zOBc8C7FT}ldglzMTlwNR?qx5oc;2`keL$)PL>k^cfBPJuu8y}=q!HVXg~L=)tvUMU
zB^#|`O;gkavoV^bmM#09psW)iyJ4P!xx64#a;nZS`~<`;;koEzFKY6>7;{?~_Nbt>
zUOy=`XzmG-G<261w<*WdogP4?@i51nWzEhoCAws;VA9aOGfCCIDz}R+ZNOK@lqZxj
z(#q90ar40M6D{fH$(g5omtAOoOtKKwu@~Yf33zy4YH1X<=jd5?(+|{gto<Z<^w^Q`
zLBo?kEjW+uPfVmvHF2<zbj2r#vt_X<Mz}A=hcAYUHltsj;P#omwMvdWNgP`xQ+3J*
z3<PAxDUO?aiCX-o$&9R(=1`b_YGuXSNyhyUR(Z+Pi)^KcZFJ$-PUq>_Zd!18YzgCa
zetZh6ZG}#JdT1fK%-eptJ3hN)ytsW#8^F+OZF=?_S<t1K_VUpcuhHv%blUQGZ23`*
zG^bl7>_`FWlf8FZePq%-)u!ZFE;3`QUDXx=-o@wLAZ(5iJlQ{95s-3*Q;Yxb6NT2E
z_*r9#WW^x0RhNg+ja^B@DM>vz2KIX>cwKDJLU6@6z{r(XE3qsP^>9qwU%v2!*+hZ4
zp0tt;!biR(0nJU+8%YjPCBEfYZJ@x;iq4;U%TmZkerB~GOFL5ed!pBMPl#3-a|;sz
zQ)4bn^XXtL={-v)v!r_Xw}LRj%y~whs$aVG;Nb1PYn(h<eG#c~>KgU&ibf%53>WbJ
zopPd0Ov7_;&ZNd2lO`phh~l}}+2_&STnT)t7RMz4T;IX9`y%aywKPa6E2&>OGU#n-
z9uYH`@an}K`WfcKG}c0V$wQ?|FrZ5if0*IL$zqn4fd5L{kZ|TQAQw=f@f_t|qd_xI
z*Q50{3-{BiFt43V<uk|FIX`41WWKV9?94bXlDzt}d6GNqN4C$+1?um1q;sI?3My|a
zrJx7IZXY?2By~!KRo~}TC9POGXm^zJ@F8VNx@=pI=*sBMH5CttR`a^*Ol6+oxOyl2
zSnoyN8$Q@U1T%J!GvI<*bwd?mC6+)ySTR|Aw_9y{l#?Z>OzU$dZAL9i#h;5tdh?2h
zp7V_`KsBo4{vo$x2|bf!i#Yf*`zsb&{J2gyC8J#7mAOkETh_1iI^>2<(hpfB5GHIf
zA3jWt1g4yXxHvp0;q42W<nb6}w=pL-6k?ZU`j%w;2xeGL2snD2nDlE<&l~srfG4l5
zC!*U09ch$>pVV-;^du)QDg7oZr<du#(Mdr;R;h=w=<r;Hb|=B*m9IQW)2tyUsr-&1
zli`ba;?;nrcAWB+Q*o<y{M%~{QopSr4749|+zT<eKQgXAG7vLp_VkjGer-2&b}w}H
zAh>QCde?#|tXmKU>4lhoFZ8B-2e$tt$ylW9vq<E|gH$6Ex?ju(ixmOR+@hf!4SSDZ
z8abk&Z)*(s>jubTjW5fAZSdk*pe)1gmL`Pw&G6o5Kqcr$BqoT-UO~aF5Ka=Gese88
z*jO<?xkEl<@4=5RTY!S=a$tBwPC?VQXyga#!h!u{(pPb<7xc<l=O$FH1a#@E@%SDL
zB+F0qJY`IUGUDm-h*erzQ#Al(8H!6gE)54Qi8Y7pQQ4eI<*;-vZ7RkY>e{$xCvCcZ
zh3M7cJ1EEU$U#p0nCs?zXNF&9Z(9a^Ns2yz*B@|Jw>fe^5~b!C71FRXU{<_&Xskj?
z^u%mUro%`{u({cu!5_2ZlYzer+BaktGy;v4Y<V)m*jqe0RNPfHX~b$wP04{}+`tq+
zw#ZL;|I&W7B$2kI?1x2F{n1yqM}dw%Iz<p(!D-4dJas|j80{^tKkBw66ks2h9aw_r
zna|^oWKx*cS+u~wbO(~VYv3czw1KGU<EMf93QY^1sl-{wvs&p$Q}NBH7!AP;^-(+h
z=ae|LxJ5t4sfeAOzxsT9k&J7lk%vd5xTEpp)s`mmR(f3O_gMaTlB#Pq!4$0~E(v<L
z*xTKhg58+m{($KRmMn1?(LR9Ccy~->VIOIvRT1b#xNPjN%heyVxxdb2#C-|)ys#<8
zqQ<8-BW-kN{hoT<!Z^bEakebTQFbn$NYU>QAO%b(fJtH5g;U@@eXQ2_G6bWFPISkR
zXFjVHgwD-i6cwPROR~j_Hk0bM%C($Er5&(Ds$BaK9grhS7JjN6`|hcI*%gpqB|5h|
zHgwY!8wsk~uw@?Y0QYgfB?>zARx|bZhqByHh6#mD>)A+UeyT~F0ZE3>^<*D{xK5C}
z;>OKR^8#5Yijf4*qW%QmqCzp9a7<%cmQg6Lp7j%wv?N=tPD#5OdUm7l-cf7chk#o+
z7N8Lu8gXGipgi@Ei?;mqti@PNKGWzGSL>gft}Ksg%>Y@N1CWxc@(hJyl{jG%jIvJk
zJR>O73z|)YJcm~#y6g9Yj833%9Uu&o_XBLs-;b!6!-)K!vGBXz7moLt1@^F#j*zo|
zP!X}+yeKn1Z9nFwX)cCZP9f|1Ve499XLG7qIg!4F%m$hAiD2P#85UaAiIaAO`xA_E
zXlUTHdc$a6qn8%Is$LI`jl(C@Yjd8En%bJV>GvgI&3HyIWWh;xz&7C;0FsW(<#!Pe
zIsHMaKEYVrg(NHf92|nCltZ&<`{#7=$LKs#<1Sj(=?a8HFYTXb{urG+zKJ>;u|0#$
zQC8d|c=Yb7gu2P|fjVAlEL?PD)fQl)aR=0>GmT<CQnbqxJkMNrdM9~o$rBJH`|h`)
zk96UUn7_~s0K_R5<lBxt&ju`kTe?8g`3n}+;ZW`Mwvw@i%>-0pA{sGS<Yw0BV)W$9
z`?jqFRL)|2$wW`XlmT%Ffo30_=ORM-xeo&K=8^Hn@YM3RoP5Q*D5U00UQG0DSB=h}
zYIsVFb-G7~E-r|Izh7I1@*L^j;4Tnlc$-I~BMh9lY85bEnn`zRF}%SoToa(bqJI-Z
z%N=KN(R$$DVj(kP=0x;>MDvk8qDEDDqtA@J9%5YaL?_;<d5aM}@I>DZf>rK%8;YS#
z3Ebiet~kHf2e-uP1#vnPtVQ5%j^ebBU9bM$LR{tfanIN1+NONDJ_6_eW~EcZM$~c<
z_+O&@9|@b-e-Q2t3Id`D1_FZjuY?sdwzD%aHnkPAvo?2dBWL+X%-XAL$bz#V3)EL`
z)Ko6l=v;I8m0?sxy%fGQP{XK;lw6;3E-q#;_@aE!_rbsXhJH~rptLQ3(O(@qIGD`L
z1PK1x5dyg|?goK({T9x0{WAs<FPbyWj-D-syp?r()i|TZljxa`BcE>Rpf9(!cy^2g
zC9XJajAg*ioY=In&*;@yjv266f{AtNuPV;fRSrY5G!Rq?6<@seEId(c1`_HEY>x07
z+Sfu>AxtqcO^+ItD5unA8Ou3mjs<h3jZJ&b@7mf$%7NpV3x+c--#8|1N7Uui(5vcq
zH72_*H0t8;T72jYLy#y^Rn=>?O`nQ`LA5DC(l62ZYTiSPC~YxGEY>%KRoGJI&_@}o
zqX<Qi=~Ro9DEe6mNrx#f!(L4C2JZwzK;fAG1m#?z%Ve~~g&<&#pH454_@^IRo9mt+
z#uW8PRPM$H4!S9;(eDxG0;4^kE1Bl27r5Ue*;AT0pf;Q{j8Ki)ou-Z6NR5l%aE<k^
zKk^S-ZsKk<+kXuVHf#_Ooc}vp|8P~J1@EOcgz;(Fxa+`)1tv!vCahH<+HfF(Tmm*6
zE*}zX1`d0zacl9ML{HyoOax(9-E6gE)7*?<-@LA)L!`b)PONbucDuD+;s2}Irc$GF
z*+k?mzcV`<lJn*@(=U_H^Jp^jb>?*@)AfGU_gMhMD2~>>2y(^hB@6GHo+=1dPtC3l
z*Im7;LCu0wH{MP!*H#-S*Z62_ar;<o_O1P?zf_j4-b)Q$=Z%2(Yfrc?$L3N0r33nY
zoL_O^xcL<{WE+~NjALu|)YkVl=x3V~&mw&Ac?3)&q^Az2{EOCF<mde#q%T46@A_U!
z+kxb$LLR$Rq_?VwNbNUuc<S859fT+Z@v_$})`aORRK?yZ2KlJ!Law{Vq=7#Wgc!yH
zcKeykkF2rs5|0~k^IZ4wKPr%a4Lj<+R9`&sDc`({et-t194WFsTjIP&hyS7#7IBP}
zoN5y?AxE_kEhwumZ#OnSk9tImjU|xJ&4tTbjvdx0;xrNOwB<{_b!zer?zal0&yvQS
zJD?>?V#UbmuJ=+AX{NT4q(<jLTjXHrX0@TT)m>#T9%=7j#@+|^nPteL50?z7j5!sz
z%xxyI(kRd3T*cvzspWK3#^oBWu6O!h4eR_uiFCv1lMm2c3-Muemo({<hg)dWaPBms
zCFCj^)5!!ZwltKSlu1{0Zp6?5rcu*aLW*N1mB$Gb+@6Y)3bYJUU{+{S5ZB78DeAU&
z2w~V?HJh5Cmus!8Z3_ocA=BHZ5P(T66*W`-annZeII<FxN~)3#3o(v}(2(zKQ@bb3
zK#bj~roOQKVGYRv_Lrv3^&b!r`~A#(k4<tCa)-2nAN9x>n9bou7R1f%6EU^S9i+e)
z9!z%9$`~)LZMIO^r)g5*g_fUdwwYO3NdmiJ0HVuS=?p65C^yR2b&Cg`3hV>~W}S{g
z3NE?>yL+iDJr~rBhSe?m2|7SU8GfW+5e=7}tWR<w*=?jwsCHoq85}E)(sGM}=)lg&
zXpx6av(+gK6bJtd_3kDsoy8q3Ee=QC)s2ShoU$wiP%WmdfSB=&V}DnR8vj8GK$dmZ
z?B=ZZ{WM4XMf9zP*;rESbM<ISlD(L@XzxX<W)fpN&Y*@t&aubteUk!|Z4E2har~(V
z6MvqU*MU{3C{v%ktCxMeE9tjYtvhQZvu6J{5~&xp?|by|E#y;eeaJsCz7*&@hE;BV
zuHj>T#tLO=a5P9u!SIzCYl(>QJJ_|VgvKdUDV1@HTzF<&jAV8Uk`$gx5LDKO`yT$T
z4^MaNiV3uODUH*-sfcUVD%~%7$p~L@`+%=K*NTUCYcj=Y)Y!`L=4gD?E!{;V_!`dU
zO)Cnma1}>BWiRx{(7~HRl%!31uxKdf!j_W|-L|4Xy=3hsNv1TFWft4+A!Iv49F`}O
zS@9mCDYxRbiokTN8zD9(<p{ZBAb^@)xoL^}A}%YE%c83j8{p&pj1GmPAajz}cs}#e
zH2W&uNB+ikf|V^}EA~a|*mB^@T{!p5e)5e#rq~OX9ip|k-#PDrTKT(nlA6T#y27Z@
zu7QFhL|o-Q>s?kcjt&*IGW>=|gh2ir^8SdGm=RNF+#tX$+>Au#?$k|^rLuPn-Nvx?
zaJB8Y4$iqOE+BDvy=*OgBdjUd<&4%acHX(fs1pI}Ty{W2zg;c3X45Q@Hf1dy{zO(t
zb57lQi%R1I$4^7EgE)KLjZ_%!kSeD%UFf^qNfti4V6y^9%<>|(K1O-{rfiI(?hYUM
zrF`_c4gPB(zYRZ`isn#=Q;1P+nXDU!OrhfibJ0d8=e(^LIns!Y?s}tLuEZY<fm<!Z
zHv=4ByZM6BI7Jv2>*_|<t-3b#^5T2T-h|5fHd0HtZ1(xsY39TAf`F+}8Wv9ur98G~
zgsnZ5$Jv#qONCJ&iVkM91p3hpSisJZuyc(rJ)-g4Qxm8knHcxyQ+9$}24<d5KYZHL
zm7Ph+!Ya=}^sY@zdp8$mWD{=WLH)EeTWIG|qee<+4fzk_scOpDeLGn8)(J$u5Qx+R
zXQNeP9&s14Y>y<~1PT?}tQs6Ccg|{?Gb6;~K%wlWBcrxvSH_VTYI<|m^>C?nYEw};
zQ7c;+D}PB7^L_+vEJ1jcSWo-6EeVFH-8PD*jsmh-R4w`;rD;@oH;rFRf~;9+ndRYT
zu3p(VXGT66+)VF-1m#wXqRj`z>KihJ1w0f)AS%}3MR+C<SS|PcgHhvpkPynx*pWf)
zu`jvG=gQn_UJxe!09L7LTSO{)4=C-DL6%%1WrmLfI&m7Exnaj5kR5z%_;|hR3*Q6I
z*1RfK?qBm+hC`m1$jjW7;I=W4VEHB7NXHvKvfMK|eM$T3b^_k;_vNaT5XqNNJp^HB
z-hSbJp-MgePE`cr20}R(N44(XOw@$fcf8u5l<J_b0?^;ME8lZ`nLCI~{mlPR-+2#T
z^|b|fc^CsW1kr0B0oa3se|k((9*Pz{xhS8DV$Mo0j`HqzrPzjDXB@6{;z?U3yuusT
z_@K?LgduU2V_lu0ZfB@Ggl;((z@n{#55;@J+g8xL6WV;+*5&l0cO|j$BD1CIU{TIp
zdCjy{jM?7ze{+j+d_%m`FCjT<nVijL8Tf<4=H>^&rZxxBE*#AVae<#j=qBM%cMkgm
z7Kh7kjs=TCzM6oTK-FmMZe}E-NWE8=U^hnWo%AMkyY?xP2*AQGc_r(Wjsqc+z}Dqi
zA4#;=3b{Pk|Be~co)GBTw86Y?k9~=QJ&#s%q%B}<D|oywZT55Nj^^&Q$9K1VLU5Ma
z_J-6JO@JX`ZIDErL|5C71&Ll)twMv*^J+-#JGAmP?>UI<Q-L1?bhP+2v5>1JVlST@
zr*xxRgZZpnn;$faq3n}ExT==y)ZlToJQWaQBYV0S@{P62RfM1)5Zkgk%|#`}R19(6
z!cNJNXR0u5S0AVD#Rc>gWDinZU3m)0XS_IBDF|cap2Ex!3D#e>u3pJ(@%!G-sMb_M
zMJv!#0Xn(<qoO(QtU_e*0%=uc=^d6S9=g0VdRSLc22pOb=qIAD;BZ}cPNQbSvpF!O
zQYo$y_75F-ABUbRIQX?>O$Vl2eR8n3faXpRwnKw=CwlTl@1_YJb-3ax)E)5`^3kmD
zcR8F?TXsFQ(M-j8!x6sjlPA-h06B%yM%F3%?I|u})o%M4ZKULxH2D!YM2ky_>QKpr
zle;x-NqU<`!-}WtmbgINv(;r3pIrMw9BLz9x{VEH+}4S&yNtE#;tPSYzV4K+AIkIj
zLrFf_Uys_7)IqL3+w%xdipdt%E~BWhS)(j&Rsgm(1!LpwHPvy4;nUM#9?TN(C2($k
zDoV9uM57vj+Vv0)rnJIj&ticGfj2GuS=+0^d-Y-_XlG73>`UvmkH!5i3E`8mfhsgM
zW<2sm5yTrd_?Ppvkcc3^M7~h};|#Ne(8SUV@{Ma6s2{lxB2cI9p82~iqdAz9(hNV8
z8ZU-8>kpucP&qYzt2aDHF^fIz%w)ry_smB4gk)OyV-*2Xj;2d7LhXb$uE7;$gl
zz4COWaouAV08d_I(qk!7o@~6kKPajIAjmRm(?VNbZ#1v2^+A4A_q+{5%QzT6)$^X_
zJG%49yJr#}i0&FUEF7du@y?#FP5P=uxjqN@Il3<4aYyf*MBifC-+!yU4Na3jlQCoB
z2#hNql!<g}k`~o6MKee2F~Wdxc@0byfcDhQt1nzF$DSU@__-_BG~bTPXo)Z+OXEI-
z9N#9{Sj88sG;%S(V{2U=UuT&0O1BXWg;@E+wQGa4&p73t&WpPrXdYL5P|05OCNDeg
z+&L7LCjowg<hn$I6)iC-`CTI%H+n0V+M;xg)CTW<;X+@^AWiiW(spFY`qY&nZ=)b1
zw<RnWvP-4-MtcOzD0gZacb%vL2J$24ff`9RL+7}{gq~O|OAQvR0=;s{tZLsil(vy=
zQIF8j?vyy4ifuyQT;y_kZ&!;O$+#oDa!2CyRU?Ks19V8ukYZ&>D+(*`@jmc>iUNMG
z%u*aE5RhsF5D<d@B?=rJ9R9hx{6`qXYP@QqX`+8ZfuZfeU?XGINmg=-6KmMpE}2lI
z;+iY3K{UHdL$M-T?3s~@KKt)n_N-4awz!iyo$*{=|0!DEey7al_Ts1VzMlGX6>#0j
z_Vw|8YzWd~+V{hrn`S=+DSp?PNI|Sx6i3oK@)or=B%B3jUr=qg7z=jW-GDN~H_1Dx
z3-DTp0mVcq1oT@CV`HVqv1YO(!cDl{t3q1f9(h5?x9Ii?Q5QHJQtc9&qRmgdtwO@P
z&51cS@9G`F>h>D@k$xM6lzmHU8z4V?&bn=MBTuArgNYZQJIsg5pV@?&P8>VSVy?qU
zP=sp6LD<^lrXwo066T|Uh?hkWH{Y;RaGj`>$!VI5ds|EG@&Jn(ng$%^kn6XBUYv50
zvaka*HodR}GtV@YY+Uh+ff?r&^9WA!+Y;o(R8tKA7GiIm(t9&(YaLEJ_?#h&uC6$d
zRJ>5wYGlez=Mk%^*f=V*|D0JRQGYVB0V$!?<&KZ>=S->G-~#0}Z7hz(xeF<ii)`#z
zO>o088{HR(8z<(oP~tOGRVzerHc{OGRO&9(GcBV70#mI?)W3@T&0(E>>jC5r)yC_)
zcz<P|xX3lKr7737cg;|ib}XehWxc5kcP@$-Q&q2l7uMzdhJn78r(*Tk%;6+YfzDJ)
zY*DXisWlR0*cV93(O8Hhzg1R!d7F&ESm1S67_ZR1pB;9fUHuUdl0KRm^Nisabk8cN
zc7r0Qaf7^$anY*)`Meu|lTZDEo}U;p#Nui8P<&GKCOFQ40+tvj%qZ0f4ktK*1=lm^
z4QaS*3;Ar)86vP+`5RA9>4xDAgFj*mu4lj-)}4{36b`;Le)58Uv1n}p$e|rlEo7t(
z#K=%kdI)x#25(*r_J7H{8f8&V8@d{M5$TI>EBg(4vA3Xz%K7BH4oOvSjvxSRC>JI#
zP-#g5<#kaLQr!isV3__z{epp2=8UD!#u*S9(`m>IM~@^Pt2PYuvpGu%rFlX8r3#Pm
z;MsDObo(P@qOJd|=FVkG;#leGuf_rI#_9aI2#TRMf<IyDotdtr3-G)?fpG7aCok=J
zEv!rLriiSiVU<jxogwkoYxG^DvFU&NdETtky%?0BB+>Bdx-q}9=-*c5(%00elSGSt
zd3te(QV#*GNXd~p{{n2;o^u;*=7aUIXUi{eQPFwW7>bFu==p8ZHDBi3IQgQ*nyipO
zLAb<^MiHi`B}e@Sb|U9UgzUg}sC#TH6ZB3t#D%iR*29G|iG88ChC~bdB18qb8fp7(
z+~`i8$3^F;g~tX?(h=I%&2;j>Gvu9)WSp|HoK3a{TAgo4k83AeG!RETo<c49$d-HW
zVlmqkc=&K+-!UPP)Z~EFk)^8>)?Gsfceo7%R2OK;)>5?Vp#o?`Jv<?{68SiDd?mP$
zOu+r~?TTi|GCgp6-R|zKu=wOX*C)I{qH@FQSFI)CbGE^oG(`&&Z<(v#Ppw04T-LKi
z?-NAbsV55kK=XLi!f=n3aBMO|{UoeTe>Q_+G<o!kDbn3g3lNEu6-FR){66U;;|7U9
zd#8v*^VN_%R!BneD6yH}C9&glBf!wy?GCQk^?v!7Xi#dI7m7aAR+HNzaVZJ+vWaX9
zd8YqpS~6GO@ka^H!?VJko~WT6Z<#iI+=*7JB`GWU*>T9~-H_+95SMzuBefE4-sE9S
z&;ff|qS#LM@amDW$1Uj>I=D7cYmn26JiXf%!7HwOQ3Qjeg30~4Ci>CH9X8NE3_I_g
zv3HG$L;oI+o;q=oI4370%^Js!vZLIvw^*F(HPNP`7m{W2<R9XbWaA{e>fqD{he;<P
zHOqzcx|YANM3EPnp4<}DoFi&!t0%0g+-j({&;FPiWfP;KVA3DHCw*)ujZHjdE_%<g
zXKok1kTY}3JI*99p}l!%c%4Jn2osUw2(#5YY%H0{8{c_6D_tzF_G=2@FliF~l?MrN
zJjWkDi+={QC2J-3&3>xD{vKwgz{6QLt^fzt#+`sHqZmv+6@ob#-YDUdun=MHi#yy^
z2JSdXO~k{75o1vU&ZI(IUV;RPEaZIf>wnf1qz59B^1rsA1q28P)xR}G+}y(0-Of$Q
z*v;J2*jv@y#lzeMVD0K=?qKfnPj&gOGNG_9j9j&wC>-=1qQ8OPgKG~}xE~eC8D?D|
zYy=cOA*4+)hlCq0&x#fFPCF-o`v%kxy-=V9A9Hfr+@}Lr+0l8CzEWxlqWh9Nz?{~o
zHMVDo`3sx%J>d7b5W>J=vs`#$g&j<|^$OCU^)--j5wf-wmZoif|5C2zgE_tr#sAV1
zM`KGNu=Ef=f^OUP+_Ywh;Cdyj17rNLQwKs+U3wu|gqZ(w;<pk_IX33FL-dYI=~O7t
z)$2M^;YADf*r?b5;BP*P6lRk}$AF6<cW4#f=FL&gZ1Z{~m5~)F@iHsy!D}2ES+ho!
z*M)3;+)!z|BCwU<S)36sLIJg^7DnMp=}n(qIhJ=fm`Jp){gxk^U4WB}EfEe^9l$n;
zFv(ZhYK0d*^o;kV8;=@(GV@bW_SYD<#F?078tV_zBJPe)m?z<Ml9;(nmfuhq;K|CM
zE*U>Zsq>@8I)Q}wk#Qu`Qt>P-UH$`2r(B_3@+R-_kzf|FgE{&pGo;E*Oe*#CtohjO
z#y9AZzpephk&!!>gEq9zeG2>v6rXiyiYk%OK$}y<|HJlwOqC5B3PMBvW%?Bw1O($>
zwk1rh9RD$Z|KZkOT~~EO1Nk$K2`0=25Byv3wj+x|D7Xxs4vf81aEG&fKSRj_<pe9{
zY7*Im{QYd;;tvm?5zy@o{4MZp`PA)s6nmGOAl}p6V<zDJa!p{~@ayxFDhRSktt%#8
ziaAtKi{Gfe!UT*5Gyl#-eu$S7-4AF2;SHIOv|kXVjK2E%J%p4=c3@TQ7DktLZ&jnN
z$SD2NmzMH2qrOM2KE759pZZ0oj|(gjxbFi-%JdV*eWIz`euSHIj9r6!K5(|N<<#7N
zA*naP_6%bIL;6KwV$4AX{yncFZKS6>&#_s*I;%$8n_i%+l%4^sAQZnkjbY<X1ET+Q
zO`tHzl@=)6Wwd5dzeSyW_16<hrqFgm=<X@)#H{Tcj?iq>nQVByxKj3*D&ygi$#0<k
z0c@q2AT^=|SnF4jo$^~o$yR_RTj$&8w<0xe<k_zMkh{=CD4m<Vgewhj2o36ZDl+C2
zn5fr<>%qV-@zb@al(7ZNm7e(q7lo^^@kHKYpB+Au)-eEj*0SJ%=(@Gnu$P`X$r2Tg
zCvEEmS!?43eQo0f?`X@;?#b>q632A|47uhwp;py=yLV|4`RSjFj-6@P#^sqEaNku@
zxXaPS==7)1*}HS?38sG$FCdU3qYU+MEB8@18V;rnDq1WaLM3#Af!TkTy1AP$?|RWG
zhIfQZYZVS3QJ&veB>1)`&!D_YO)zPMYnx2>>@gHu<NQ)Xp4h`(u>6s~<3N$<-~f;?
zJz*@3hz)icp{xrIP2zM}*r=%(k;QjN#Jm=pbv55+?%D$sS07<GL$tZv{Ao?sMV+LC
zH4nbf%^jva66t=^6{#2GF)N01RSfO>DKzwAuJ&xv$U%~=!QNdO=i7Cz?e0^=Z><Z|
z3{1O+7I4R*V(?2eKM{Xx_nf*QjD14r#tx<y<^Os>-(yM58aA-$KvY>+BCZy`aVO!C
zt3+&opHI&GCT>yn@eC`PS^pvwD6WY7sUB`FJ&a804UPhbJH+oWFMAJuOX!dYTqfCo
zG@t6RI^qfoxC%y)+20a&YQpl%7XI@aW1V1cD-&nWMMflbSt9CHiM04bUmqf1?~R8=
z-|vn^qaZq!m^3lFG)d_IUF^?y22<?k)6hCpVTFeTw82;Z{y#x^WX0EWfr(l#JS_)n
z;0s~ZvqFARyN9kZ7x%7SK=8-(9$Z4D$I4*9!WICx{#AMzd9q2o`;P4oxv!MKM2@MO
zQ2Fht$IU#fiCKhe&ngY-SUnp#AGQz)=W)iW(1_Ve>RCsPJF|Pp=phL?4bI6vkK}%W
zWL_R2R94em2^M+tCf-lc))gAYFNBrXFn<ib0k^<7(p05FRxWD?LP(clpHL@5$*K?N
z|Cu)ux&-g0|K^ROztgE$|2=Q~Qyc!hIQp*fM-|;1|BK#pGw+<%DI&;1-@?6%Fr%bE
zIhd(nB-n+ttbF1Q1b*bIzBO7UOIcfc5lMbQ?JG!V16^r)5%7zBA#f33>Yu02h#Yt7
zaP`hV^Ze)XRq*+E-!K5u3ES|Z7xcl#3ztegXa0s#!8YP|41{C(iK?^40FT+;$x6ad
zP&hXfM;Z4oa4cqYDTM(E1i48H3^H6)Zk&-aJtjFPiHfccG{^m{@bfm3-~yAsBQzPb
z!|E^?XmBRmDeC--1wScOwYXVx1<e(B#QJlwuAEU7wOS6{=~k21MeT7lkT2bbzlT)B
zm`7I$@lkH(u%uB@COQMigzVbV_-jdu=2O6^XW#8~+q%7rw9p3^9W(;Ul84D^9MOQq
zBuj1<mWaN3HhG*!v2NRSR10E%6GZa7q9*o!zxph)3Io;Fz9IldGsX~x`at<aWD9Cn
zwH2UzLq9?(&+v`sDCP!z@dtj(&3pkJNicsFRg^(wwFS6tr146EWd_?HNfMZJ33V16
zbeAY=O=IPug#At+xBZx%TJ?_&##EUY`=eY^u`{gglmS%v^9+&7IP$zDXXNUbpH;rR
zcNwYpE#IIaA$=!GMMO*^XDzY(;$hK;?WBuZd5?ti#-rO8>_^nl-Mwx~*1cgJD%wg+
zL9dzR(h(+#^2cCD<l%-Bn~FO;Fs{$qV)=Nt2lN~48P($*55c)Yeka#3f9GHt?o8s{
z9x04Na?%|6@ghAWKtQyQhLL~P7C%G&lH;!e^x;i2Gq7oKO9(l0e+sU*Hg0f;bYFzr
zX&LCLT34H(A{QIUuglGL$<4Mc%$`J5Mb9%F9W&4#unM=1BE8m<l6s9^9H~q^OSUIW
zu7c!M%C@0hFz%%K-df-QaEiPf3rT7wNuk`%Vbyy}xkA+B<cm(@usHSiwK+GZUE89S
zss5pAK4s5D8d{-`rPCRM+Q~8mFQLes6|wL8wWaKWG+*!-VKea)2OyGnhmY=l7*bHs
zmV@XbJa)j9noB5IKzv|ly&*`Xo1GEjJVVngRX&>AFe){H&ZD;Pt08F5+j81jQ`haL
znAcnqa^G_U$wBxL$aqwad|+MFXdL(>5)oke-E+a&l2HY1<TA$*mg(2*#3^dAv7Dn!
zpMSZ%MLxL(Mt_xS|DoOhI=&3_iwBJtl&pI*zLMV(?F(PTafHC&7D^t4q^-j%tn57;
zEg=;D4BGH0cupUDm`~lx4QUn$(GSd~aNLyIF)XqxUy7m!AyKoYS1Ibsh?<|Q3k@;Y
z&SxiNgHpQb^<)LrCdY6O7#HK8L0gd(=4!1BML@do1u}9;O{MJB#+Z&_>OR(|q;2d!
z;Pud}XlK=qF`qHvaQrXF&&%rUEmbBv;N*>JcZ+ZB9hD7kR#hLDB!wT-&PSI>%M?Do
zc+q_wVN9VMsN#Nzk738yf#EsYizXn5*y7MG+{5Wzcmvpd6g?|sZNU?D&fni^29B*k
zp_&bb2vo2lJEb=L_tM(FqTa8WG0hsX*Uo^$oJ|Xo&|e_Muw*IGg~?w$q^#+--Zj0g
zUXdJo7kyk_IJL;GDKF$E@pA<s_h0H5`hjHCdoPA%f|YxNK~XDiHmBbX3`g+vikjv_
zfj>jwd`VwE;Qwa|6v68VE&OY~xc)U?{%<Le^<UfNk1qZaMgVkLclH<rCo#%A6RxU0
zj2X&;)E4%JXtL~g3Q*L+^(^DI-UMR{Ha>8m`nhLO%VVXae0I}1Z8gKP_}0*Dg<_yp
ze(9I!7fRp_OOK}q77Ts7Inh<F?{xd|=6%m`z{m4F9TDhBAJs-3lF-~-<U6FoSsM~C
zP6tpnjGBR-zzbx~?Tz_{*T{>?T_V<tl7Z%wI=?U%loRKZvwS+GX>hlJLX4iV6AsD$
za~~mq_J)n1y3LRSiSj1lj7Xy~pe9SojX!Ec?Y^xSjrZyI7(|{*VnAZAHbLi&8(4Qm
zpd`l(+=t3<O|W0f$Ve6!AKMOye9$hI-zI@bx@_AK562~e3FCZrE8ngbo%C&zTmFQF
zYvps%aIhn+)=Y-GqRkU=P~pqsLi10!DP;6i51ShUzR!aoO8z>R{WZgK?=2&<AbITT
zi`})1q~Ah=A+5E&nRBW@JJWd7mFWTS<P|`DY2lM!Am~95-MOs99`d(VkTn#O8(V*x
z@7`TZc{n_9Dpqc1c=1r$vY?-zmE5n$UV-HQ2%>NezHDEN{D`p3O$rV_m&2?<m@hd@
zM{i=GA4zjiV%cn;t}|&cC1<Wy4Ygndqai&i?sZjutdl$%EA73<ZK+JrK)V3gr)v3f
z_?ZOfOjwsO6|Lf}D?lioQ|>pM5ueM4_GCF4{DzE^>Y8mmK+&yVtCL&_IFyjlo4CDu
zV5JVliHGg7`b}Yma3QrW%*86+NUH`F5SpGk!~<UYTOF~p!g2&d6a9|^BSM>n^>nHm
z#8j^>cWuGd%SmF*21J1EZ_l=$n+9rA*OhGqbSu4AQ6|&~8h_GKpgx0EmOi-D?xny-
znC*Tmda3?q0FQoY3_p-Whx|18ZU3SE+Yu`5=9{e<{7=LWg6oD^H}G_)wO(JZk>2-F
z7(b(*L4`JH?P3*+KZB7l3T09S30hLertsQACd^E=bmsHYlkKBE0NKG97{7^LN`vEX
zSSp<tfSr`d1(Dj;^;UQ*;sXWeX)P`T;i5ej-;3CKT9q1XbxLfl+_!SNNmwiSW)O2K
zW84-R%mjb6mUec&%`MC%DLht&lGFfPX#*;{$FC~#)A~AONsuraZRH|>3?*wRx@^Fi
z$KrQcf!4Q_W6LR?qeh`-Z(sF2hdh~Df%1yomb}6w8=Y*mwWSnJ=JdzIF^XC_i|%k|
zM;Y~%kDcHN%b!UaCvDnu&bAo5vdR6u9`k9(ItADFvvSY8X2Bo?@aEL*#R7&75<0GV
z`_%I*`KBc*{9c!me94yj_W?KkMvn<c9q~!Y9N1YE5o4ALvG^J$t{j3z4daK!yjGnp
zlwITH<s>Lx6m!G)8Z&zxC)>tChheWcN9cW-seK{G`Me5lUiLZP>~Xj)-@LNMt)T6y
z?3#zS${DV5%d}id!n|8!-hdKJu<pTS(K`CjJ@VLlcWzd^uZYwseGb3Uq2k<1Lo-iM
zucYwusdBrzI)73mpGhC%m8E@cwPnr|1`r4%1_o`{UxQl_NG$5@l|i-E#V$Z)lpq?3
z@9_4-Zv4yxZzWT``}rr{19n`a3Dn*RvU#82T1;26DxxFUA99ceZJ$m$#uKxyTG?>;
zi8O}P!%S02(K|GWYT_<eg5AP|KwpC0{7mQkGxUY@Tk41YGf5VQUx<F@i@DwoF4NBv
zj+T(+TT+k8IZfPO<G`R=jGM=0EzljUE}s%t9Q*gQ+bM}A^G>)nWBZ)Ri1DB5etxhl
zp9X`FJ^gCG2HhBc$CerHS^q-s0}^NM;eB(0O^}*cunLLCJb*m`d%Go$+atjr8EAu!
zzj=J2W*{73^TMCmg?$r<cJ;zNrfCnNaz;^P8P@KSSldWpxP2E0FzjnYIT#3gr0xj~
zZ3B0I@F4qkOE;~B)|PyyBY836c`HzJK>Fo7hn3i=LRlW3KK1OJ!9Juki1HDX)Jc6U
z{6R~#4Eq58q4e7!7&KVqH`^k-nb6QB`Zq*ad=BnVO)6c9rhek#*W|%p&P=yVMMBbP
zzfw!;_#L_u#og3Ltxn&#p4=*SwJgQwuzxw)05E1hvkJL-ydDj66A$I1isJ6e78>-g
zYe@5Ub#Vrnf{zmq7K$^k<NkPM((#Va8<Kph-E$BmZ5aHGef`I|D=vA^9n4XZnvmFI
zihc#fq;xiuzNn}NJ!*3b`|z5(5r5<f=5igTl-wkrgvM!4Nq*6iMbbKk?W$nNE9|EG
zMVY)4Nocrw4r;FLu<#ete;W(?4@Ur>)#YRY90Ww<Z)g8M9D#p2`~NIz>b|OY>gZov
z{Htb-!d_uf>Q)%*>{h6x>$K9%);dMj!FDkw)wxqHh;jprS4F13l6coWWHS?Sgo5%*
z|E%0@%F7za{g}LHf9?u6ZqE+<dVePZf#i=cMc{&<fpbxngrk6MH4`85*6r71HKCqJ
zxM?QdkuDomQc81@qKr=>_JmxEFh$d4Q8O<qlX*!4MUY)%Z`Zf8jvF8d_o^;lCbV-<
zvqh+yWY%c{j4T0KYnu59p>UaSP;JU}nAH8*NPLEnUEB8^bXq2e`8ERvFFDcv4DAzI
zYL#7B$2a91<5uT5WOi6?Y`VE}>7;$mGH2`d+obkaN-iww%}}P>S~_XlZZxqsW~%CE
z4_2N-ur*j@gm+FOD;SouiAODe1C7%}NZxsT!rQE|j>VLrq)V%5KFhL)=^=}0>x6PQ
zDRjB<GH249_heXNif}@czgkv+qi{M?>OW1Dm^VjpPC;18<BZ|`^TB<)9qeQ)SHAZ5
zC6lha{mRk$v<==4pO-!6+6qFlF{Qs_o69e?DF>B1RZyEO1R<JmZ26V;efooY@9@(H
zrRKtx+1l5d-3I0^TqCcRxt?$u7;Noke%<3%gt?nrMlB6tnv9XE5csw-BZW=Oy3vGh
zF`{Jtqn>XjEacD|(hK;bKE~R;%aE@Meb1!52{OSdcnLIG!-XLj^s=iOH{e$+#T{qx
z4;8D7J7X4&r~ztDL_&||jdQE}9eA4#xZCY+Gm8~@)=}RdIFyOnkmUR?+WhicX78jE
zZG3Sx9FoH?=SSOH6!r7rklCc6Lu}_rOXfv<!=fL$rRSijqyC;^q=AjW?@oDF*AF+>
z^I~Lo*Y}@1G)Ize&wgv~(zCfoQV*yyHZTi_858&<Ntsq%{NfpLnm>#(O~qT|nO!1g
zRd=2n;xS3GNV2Gk!EWiKlf|uze#4y(YTCgPxse{6pm$gu@$?7gfW*G^R|-tep&cV7
zUUJtayGv_XPOF-1v6~ro$O_sD_5(I5LWvz*15~lUBguv#Qo0B*C;B;&+A$}RzHahU
zBzg-aCaf-eTgKFeuUup)zBC<dDSy5t8%-_2l6ET?v10GFC%<I_&Rqm6y8ljwQ0tWv
z{1~^si3%bTKn#c%4j_Bk*?}P%8F?uN5B#+dxn>$O=M$ru7MD<LxrakZRcc_=foBp9
z5+rnbeZypPi3a8;JMmscwMcK@%fV}!(OZh1LSgz6bL11gH1;5GCvOIr&R-}F&lOXg
zKJXJ@zMHLq;7<O8&ku-n{nSMz^?%2*CHeyYKTYj_JRNoR?KXJ%`>qj}3<L!AUtRy7
zw(mbjzxwZdq27}7XM9tqXLCnd6;&Eb3yaY~s#&Hcf~{#)x;?BMXBvZ0pLX@~{CK5)
zeP=_xt}2+!La|JC3cHc4l^80(blxO{aG0HPR(Nl=2^P`&2k_c687Pw_Me>#7`)u?4
zHXiQx=jr`Qz;b#r@M!Yy1Zn57LEsS^NR9eF$>e~hSj4V6xFLK2dTm<bojdpr42h9^
z4^6xy&Wi^ON%S27_zvPO$A>)3&cF`b;q}4o^W6)y_eW81AlzPl=u0}X&26pdTPyLW
z{_0l|{>rWp;)f<o^o<SjPWm&FS>spQ9Ma*fQ2%`qaR7cl;fJ`|hoScedszPNvH9X2
z7VDc(K{sO&7q?2n4^}l99vyX>v2iN&Uj<-cjnv^vAtdTRQC5cZa_chh!w4+|4AJtG
zIX`c;v7&V)4w5EiZS@JXGTybHdBaNSqo%<T?ASKOFk5FTC|YJ^69~=RVN*7d^o)oV
z6W?8N;)`k$Mp9Y3qbxKhtfkzW3vdye`<0BvIB+V~lo-NwtjjbK4Jg(P$>ME8&v?c3
zYmxB*#o?&syH%oxM{_3yR_Ef3iFTK<aka|S=ZXlWh3%05z1{G=rRHUJ9C^gnr(;%e
zEmh$)R&Lz*!{sV6NooP^^~5~s!#$kuK(GWYUX1Kfv%PXXTlRE9xj~(chsx}#f!mk}
zNw$GFtH-pbyqdVn_xE+5&L_vM<>mrktjJ^A0_Iw>-0`Er@=alO9+4@rO;T4gTi(QR
z47pVuTI|f4MJG~;ZS(Arcs-%@tVkkh7n($cW(U`BOr*8Y`o1_bQbs3t%s?sjO#GPP
zZ#rfTY6wF};#5iPdBNMk;vdYgm_-6qQpA~rglVA+^mUQ5UJ=VvKJxqtqff_$nR<TG
zB9{e=>*BPDx+Af8GeV<Lppb@AKGcjo#TEW&rlGf!PbWsKjL?cBg=4{1lq9a%Clj&c
z4RJ1bMeQ%UCnyehPr}dq<#yUQa==DsTmkbnnSv(elOrX5oS7j8=k(#0#xRDAWrQvM
z-L74)n(E$ThFH37mEIK9o~_8^3Q}vjLd{NVN8Y%~2z@`V?{X1Gmw~%iZr^)9_@C{N
zM}s@z3L80V4V?WnlB$^kG##Q%%T0#YW~uhmLf$njrkekx`>qAnY*meg1AG7;l}0eU
z;+ND=<`=|MmLtlDnp;>ByW=0hOn$kv&CHDe+S;=vgFbr^3|5upNp#p{FILDZ7`8=3
zd@7Y6^U(XKu&&+)KZcLQJ!%BR_U8a@F^Rdx2}>7`tj0kTWY)MgW8P3FOLSX0@R4gt
z5NooM<#S-sp67|@zCJK(?wa(1jHLbS(~s+GkNq4N>Fn&4;=4Mh!F=8X{ttxexz~bu
zqW;b*+^l(u2$~&UD?a*gX%6~kvRd_3yd_TOLsMswhxTVzJagL}8<YLSKr9BV$GK$s
zFepLevR0bS&q})&%x`Ao4fdIUhf#dk9Go83#LJiwRMWZQB5Tr(DI?SA3F!|Db2-RB
z(uwyGK=P==6!|b}`ydI`l1sHv-hy{V#L){x<B0JL|8G6Ucrz8r_O97jtd?l#$qV<f
zVOIsjdycC)9w9!~L_Ap{3nyG{?d;z&qJK^!0rWj2B%oI7LslikS=>>cqx*st*V@Ru
zIlDkBkFanS7Kr{TuZ6>bK^Rm@$wVbBpZY3u6MWAobNF#!fkyV3<fda;HULA}7J%Px
zjOMu;5s5xe$vS!@k&|V#>JL_XON~^voHc43VqHRp6A!dHENOIKQsdg!2kMkM7#}+O
z7WtDV;wPol&I5I7*8xj4yk?8m6^xsX6^Nu7F6<^1u{Oz(J0m~j{Ro+uX|gKth*;E-
z+|^nurYJx0nKCNU*}FzjdLq)b!J@>r)%?MJN!WATq8b=UP*+p{JLEO;us-a8XPO)G
z&0^GsPPeQ8c@AQ_{A7903Vq-@L%e_uTQ&1UnPV)*tGkN*5If81Ju4cELCmO%>Ltec
zg=%Z|*|+AM!J@NEO2T2OjKi+`it<96Sv3}l*n`({j$eOS$Dl&Y^a3F|f~?0#oZ3GN
zDi0r`B%>u+P*x>Sld13H0XoF8V@}03wNQm$zE9~}&T#n-R9bl2E0$6qWOQs@F_LQg
z=lNps)kMe5vGmVlEL4k(ZF`{oqDo`|T17Q&GNjV=PZ@qz+_Pr(n`2ek0!$&ZDT}lv
zdr0IF8X4fX^bl*8>JCQejvchRvJWeF)b5t`T}a)Dr_8b<<4cf+nDWpp;eFlw`15d+
zmLK}J)aI(2vU6I{FVXh%I=szXt~9h3SN%!qjTm!1y-w?j9}*PO*K#8_NAVdr*gnA;
zcqIx}9S=7;TCP`@TLX>cJWH2a5sT5phWOoMw}&bXxSDxAq9Og(29-PEQ|NTMR!<7d
zTA!hoAdH@<)0m{)J=&VrDb%3l_&NOYXzmD${cRtqAlITr%u542H@smZUwyrT!xwK`
zR!}gta-DMADUVpYGfEvvPaAT-K+a){wnj^Gr;WZ_s%n!M5;xK)8i)SSAWS;u+Pxxl
zTgl1yb%pc3+AnbX|JZuRAW?!STeof7wr$(CZQFg?wr$%yZJf4k+xG2=xp&^o#EZ!K
zQBe^UkyV*hJ9n=2Z3TZeS(Ah1m3f-c=LDJedX_AcP8q%+Cd`J;vLiGWFQ{?ZRE*P>
zEZZ65&mRqn_~wIZ8g9n)7JHMNl`YpS=V_it>a&6=%cF%%B4?{Ymrt8@ovZb?)b{wn
zPsY@27`joOA%I{7+8h)VI0v;r!|62&M;+R~NITGu2$HpNXHUb$O&fC;mkEzqF>!uP
zO>_3oqM2zmh&xe~-YC5zCw0zl&{7>h7<*)_u&z?fcngsb9q53Mdv>wsv}~VX*MN@_
zt*Tg;xNNHrx5#j-=(%E&uP{>X@`LP<{9%A_F_IwWVWv2t95i{sn6>@glq!??PDl@f
zB}wH_?6QSin>-O%jZ4?HwN<rwBcw?h3{`tl$qyZ<e0YUO7nbR4Iw5k^17w@gc+CfF
zHbZPSV*JlWVSK|%()=eVAD7DiBqr4c?*6vV@Xf?{$!Q1YjeMSmQqs6=oVX?L!Nu5U
zWwSL7aNp5kmn3BmSR)PZxL~EF8kAv^k2k@ft**JSu-s*>Z}D|w_nH#7T6G15`eK>N
zb14AIMx&P>xG#FbZ%fm))$h<|-y^gF-L*-2<8R*-2*zNO98~n*?7H&?qqm2^dqVjG
z;6EbNx<TVVAv{a2sO~~(YI}f1VR)sf(&r8!0fCm)yFjJM8)jhNT>}k;!0m_e=pwp6
z?_7AI`9X1{%|u2!aWmCY1Yqy`f(j~LgNSw@qrh_*0JCYRDnF$tx1V!-o{u>!prk^Z
zRD@Q~&iw$6T!KS7`UAS80PBW;vpzb~mW*@yK=|)EYAw{92j!L>p$CxaAd$7jHLGXp
z4NVlTEsZK}vjmN7BYV`IQC54v6krB(dP}KgRWF%M<EMB|IMO7)jQ6CLp3Hh-RiT(U
zz^2_G-?xZPtmKS5HTnRjg#~}M9+Xee&lo(x*d0p6DcDgVMQ4v-A?i#Vl{zf2*zwVg
zN&H^Lt#F;bM4OJrtiG|$WpxCLdi4ng{}RsJk5h<>+gIX=G=0Swc;%=aCP(NumQg29
z*Z9Hcx|6&dl{9;hXXO8ZlzlDhJ}tv&lAp`ZBRe56S57~^(L8rWD|<V8hMC@j`3CY2
zrB5O<%QMQYD};-4*g>Lk6swuG;N7BER37(zX#Rq;oh;azx~;dkakGHu@`>YYw&YqX
z<h_{XJDHv4WZ~L`tW~!_lx)){+tT8#6{b_+u5-HE4;^-W@C#51B2#8r8?6p5;gd4P
zGT~nSL>iyTk3nzytNtV@H?4mFr)TVC3Q2#9!j&aoBu%)VL||)mD{Ck$0$y!(J7Hr?
zf?x+BW2n3X89YYXjMvGsZ+|<ptEOa4+yl0=7JdW6c4K!dTXbVHk;Eep3O(-Ta`8ej
z5!t;!lfZZNhUW$F7y++ec=8ijr7uV<-wmZdtkpgKRG09>O5~H{{)8>_wqxSzA0x3-
zINT#!d{gh|pO*6Gy<T+CU0Ah>?vsspKrDo7>mT@hRlqnD2}lHT%VGL8RAofto9JWo
zk>n9r#I32tHyoZw?LK+e$2g*{q(5go$y#Qy4`mcVFNNv&c0p~O*ZQ{!#_s_ZO(`#+
z3|0m!eIAa9;qPFdOj*Ffkz0g^Sl3nbQEQ^twT{^PSgK~*6XklBCzN)xFwToDt&@J#
zQO}=94A^gUMe17XTlR<vyC@9udI5;3#KTtE0{w+HahRy8{`j*I35LzS>>kSf1KMLf
zSig<ofKUKknnNwem$h*Hk5zg;yW5B)YR^~iQxi{q0rH6t!_hJCboziB=V#!|^Dvz+
zL`weBBEJE<y)uoUyt~4$>{vfsa1}~2l%wPb?_~U!N`P(oF5mD6a2146RZKiJLY@JV
zkM_!0Ie+qvKCjT>CYQ*(aNYGU!2MS17i^oe!1XV|Wh3D({yr|{nys6%L7JD3Go^_y
zKgWQB?<5(_)DQV{9{oabBnYSbFjs<CD4@M!xL_q*2wesYh#ZJx;Qm-p;5<f$<vwSD
zD{PBDJ1w=wrhTZ?c}HwB2cTWG)koSYkW5f+GKRYm`}2jB&{C?ioHDc1WPESnQ(~QC
zV)yL6Xxv*2PoNBb;3M{f67TncYe%7)9)cKopHU}(3+JJlK8WdQJchN-gEU<blP9?8
zT{h`U!E0X#TCeL52&oS(X`*s4&j><3S50PZ+D+WCgS)<WDSUe<oYn_NpxAY8Y<jl#
zG>FE1?!(>lPldziJn+bl_@zgIVTwEhoN;Q!<FL*#=G6D1-8RrS$AZPEm5}m={&g<;
zQ#QTZdXYI(O}a7Hsf)CiMaxA}EvP>HiyWCttC??OU+q2ek)*rQ&uqNUY-%Sv)$R_i
zvXxZ;ZS@!an>}Tv+ky^1iK4>;y5?<O0k<WeDo(!A<#tw^$_Ar<g0N=u{z#$kezMDD
zZ6T>^<>{(CvJ^s<>6opG&HAa7UxB99xX^r{g4^k2iuGs@@IlwUi)v+*F9k6*L|30Y
z&3|b>D9f2FN%tsmcdXGZUu<a<KNo6VQLi=+Z!ccUqbIyZE<e@zL;lZoa*(SDK;`eo
z5AUyY@)rU0pG2eoyG|DVU&7lI4M+o(B~)Jej4^yNei}$^4R92gF%m;`VGv0e1X&<5
z8c@ZLj878aqzndV!d5BW^>wY`66~5S^lDW?2*_}#R@55mmi_romz|d-rR3a~?>v6y
zvEaymKR3PJJ6$ii-q*hN9`9RW_`nS6I*c%$9s+|wdWQyeY4{)A8gYNIy#C%A_oBn^
z{aNIs!1u*^XT`bSXTW>j4M&gLRit;}ITq!Mm5MRO!dtn;t+>;r7d!CQL)n9*<gDG3
zq>pS+_v0a@_0^BfUA~P*SH7+G;W_DlnBRwf;|Rb%JP`d<;M1FTV8Um<YYbHn-Bay8
z?vE>8kNZ`c{PhtY%z7*D)4sn;^7%yJf7m_2^4lBcxqRA9yZ@qodu>7Q?YHNB&C&Vx
zKHK=cR~+7_WazCq{5MhBkE-1xyXE-}-ly>S_d<}r=be4`)v&*Vl-G|wyxhbKH`aE_
zb?zJp3n5EFQG||cJImg(NG=(RtOpCD46>f=A@pfhOu8oa*(J+B4P!t8takL3m?;q+
zj4B(5@!c@cpjP0OA#rYaCJ;J2g1Yt&A@Qc3CN`5%!+B$9ZSI>k74UOx%JXtm+HIVG
z9u3l})d;AVCiI|LccZARq@g<t273|E0j(i}l4g>HHFJ^Kg%@c8m*RrORP91F$>JSI
zwu4yl+lGaUqDBg55}trG4;D-tJsuN)7QE>CM#&STW!FE>+A13bCP<C&f9ZzIQs7iz
zfTeEwhC@?!MP<X$cI-v4t({Dag<H`pCdI||Cd<3U?N-D!rP1^lY*|P-SyQK2#?Il)
z(%_bP&ast2dj>uceUFJS=IHQJYt^ogwS-6vLKo2%3hQk-4#V7mj2T6bPb~=!t1v@B
z2@Ek7SIfnHw=%>)+nu<8P15JXECtYupZx4#h0M3wot!-J;lso^lq#Y+@3}%|vSB7@
zQ*l!;<z>{EDuU9R5H_Udi|p8mWKb-OI>^KzcHH9lGqmI<BZQdO>8%EfGJxAiaW1YI
zH9#G|1|*EBbQXA=KGQprF=~c08#fTAz!T_bxrdruQmY~2x#3KVF}Shge_8}N+LxL1
zQcBE4=dNR|Dte`+qD;&g9RYG9Om+}M620|vmS&|ZwYGrn1SUG(H4A@~F!QSkI@D`Q
zl-=MojV{dlGBHSJU7iu9X<s6{H;@DeEJC#}3g;l0nY&EN;WEZfCEAKB*NX_O8biXa
zYBDgGGahB=iq#NLECQ8i<8_kk<4hJ5xzho~Q0>Tz*zW%=h!DOh<3g-`4Q?PwiU1$q
z=U3-81a%68uv8iYvn66thNp@aiNy^`ii5CEjKbAUZ%eo+l89LwMbnK$I&!Vg)*evl
z$bMah1qnDRDd1mA4UIu9oHm=#nCeEO4;TMiXPuZpw=@c*Ba4)L6p%@cqvV3InQnaX
z`dH13N2!(?%9)}qpeu#S+ce4aky&ws-Ij+dE!49d;#$>m0hz5lfZMDn5(Z*+G6+k&
zm{}NpYtI0YC){h2)<I_;l4&K;)gh452<uEIm2y=i`+T%Nby^nfD3>TDY$CTPR6=hW
z#*s>UOPuOQ51%(|Y7s(r9_Gmv2BxTqdYj)*ukKzR25L}1M;~9LogWr0luETBLNSts
zuU^d*VJJpo?_f)WXn7w<XqTU-wg?Vf$>j64&Z9$%veMO|Y11i;D%nTM<+Cp2-Rwt?
zd!@PlkkRB-Rna3eJ1-RDvm%Z4!$#J}%0;@sv$pMnv19kmF{3eS&Vsp8V$57+*M6vx
z#6RqDHR_;K3s31RPx}+2gT0Lv>k_uGnN3Y+++t&#H#>Y(D2N=>X@OMbvPptVsr+;m
z1p*X75pi^JZE*G48Ixblq>n8s=2QoTt3Q;MDCtU|wsf%Dt;G_551eQaEF_113oE0*
zffh#wWV0VT4gfq6WLCa}C^wB#%Ob2>1(`^UZ=gLtl^{Er%$k{#U=c^0lSygIwjrcu
zh7&U{6O$S%(w$;pe!HQVn_S_u);Z!T*tCHtKZe46=#Daf?yz)B`xp!(A<IfWV0iJX
z|9VVQb*%uaDpS{M)KV%V;PyF2OxWZ((3o-<sEmz{bmeO6*3Gyha|Mf)%k9W0+kGvl
zty_-B!BJ0!(c`(WQQz6q$zImfoR$2@pj~k!oO}}qLySR;*1C{=8FQKVYT}4DC3Vly
zPF&iYl|Ee4NY*m>36F2OC(GR)J9E-vI73>`QBTyp&xPGMY0Jh<!*fa0lG=|Qj-dc_
z0v)tPY+bQARhU9{p<re^IQio&Ogw@sRz0?fsO~-x(+F4<C)1;$QubWlx7&3%lvq&v
zLKRL!!`PN}dFM&%dU2k4v5M2kD0%*`3Y;9oN;*_!?7qvkyh`40^|IF7n0WwD*$_RM
zUjSTZevvl{vyFPYP+xGwUes%N2#SUZ6J~BVqnhIwb4czv>AoO>_w_Mp<=%rt^B#2k
zW@%1>2Pqt3h7nGF>1?mLT9b4Qacmi0$M#(6M6I{)%?_H>2)CI^UhtwdZG(>5*(`Oo
z39M2xO_;8F0_jYlcPp%OY5dUQB8`=OiM>XGHRNRos)J=_GnGN80T49FMSr^yrC8Ri
zn;U7XGj*fUw2Irkyo?PcqJl5;X~n*FZavcS-=G&nH#`>EzYm&Gb)bL!WGLew6r&;c
zBW2s&!j<gwUa`4j_Ii3tKH}@|1fh2`Xf{V2Lm86E|9*5n2%qn0XUEKQGq=p@ARnj$
z>AZ(glaKFt3!5kDV+qWK`JF!Y&Q&X&8b00G2m<z2r)dH5N@6LD#up)mC{e9QgvSc-
zaCc?en3{v$SxC+?eDFxQr9bxQXod9fn}u(2hU%4m1U;nV2>b^TPq9CJDJXFKAc)@?
z8}W<a3gZijBE_WVFtk}(@ekmPctnREJq842XlRWmebZI^vbj@JoK>_gxx&Z98BBp?
z<!YSJyFPX=RHw)K&<g*|z~5TqitPnqL|rg|bI<+3POrhvh|5Y3%8IxX#~2Zx3(|Jr
z<tr7?FbdiKQ@;RcSOy!2^TeRsVMXG6yzh=zTO!I{6Y!j<m4#D7&{BKg!83&E*UqX_
zXIK^rZ`mI3qnrq|GJYKMxxW<8%#=m`cC_DB`Unt;Dt9sH0!M~F;t2$68rHva(4{!g
z4-;UVzIIB_8x8GL4_=gc82FX0sv&ha{+!LZHi2tih|{DfbVH)Iu($Vse7JFyyeYW0
zmAoxEc};}pZj8xSQcqa|?i_v<7NJeYI*Qz-eQJqF6}~-+D#gm?#I$VbkRz55sy!3m
z(?PYb&AN_=Wyc*7sH{U};gp7JM7rYy&CdIL?QCJ?_=$HEU`M&irgOiNIP8YFmp0OY
z*E>*Lu{mO5-4PSfJFa?pF8s!*A~AqaIgT?sKV<Y`IC-2fw34k;y%PkgdOgsg>N#9{
za63+_rJ5VFhd4=0eu7SSIyhi7?_-Ehm%K?_?JlW7zK^tsq!tzGd`9O$Z+<ff<C9gt
zE&m2+#q)=YC*12&6#3bZ_BqK2TtnWM>YrJuF_`+6=;_wt{Xvq90N%=^+H4o)M!5U|
zGu5Fjl$T@^URX0PoTc}z=J_#4bF<pDno44$Y<5ybChUu)1(j-)@?p0i>7_H3;f+CZ
z=!|vyiLFdZMw7%l==V_^B6rZs0(^&a#}5J;<vuB5SGYln_MfK$DYU!&Mn09BfT{|0
zPOU(uuwPOe8NOM*I0I_jt`CpY8lx4D7z3U<!Bp8|%)_mzP|}3vYIU(Ac&O8oX9$)r
zsD6v^QhZa&-9NPV78B11^0T(A0pyOQ%04=?fCw=-=#Tp1In^Az38j%eccTOpq7%BY
zf*@4cOT~lnFDKleSpH~Jx<e+ZJkk8jL$sDNZJl#|xGjB+m9V3(najjO{$SJ+UyTm%
zmYC;(0k6t5+07WD4*DnQqm7-o38u%WN*&9c;o73V(bQpArs#mlH$~Bo=w3@76a~rl
z(2r~=4{snXxgvbIMj8Auj2>VIw=_}ToHWA^CfCp=Cy;D&=wtP1?K>AX0QU98JBPL8
znz8grh;@k%b#o&Z93pGP$+$<al(G7K+H&`)Mu0tXp(pc7I6DSd&h37gQbjg-<d-bR
zv)Alv|AM8qRXnx@0pC1kEElcYUr@+$0=NJ9$Q>ML6Kin2aPmH&wjctx%J+s5cqF3i
z-49y)>-S={lfh;$iBbP;WifRU*z9F7+VdmUzinaUqS~>}*+(0EDTOD3y@9LFh`1-S
z<`GLJp>@Jj*Wx&2&k9df+rw~mx@sV1a{j8D2Eq+FV)E<1?*;yc5+LCMh4kZB0&v9$
z0Pss4`5$YX|1Jh-(Sr0*Sz6*hF->I8-~p19ARrMy2xcGzAqs4?{0EdkGDPx65HV~F
z8U}{h5RI{wuEnm#wSra)wc0H}?TUyMG8yg4?n<vlS4XSLc4_%qt3<8mr{iX7V!|YF
zeP`wN;re9rga2gHdD{2D*xvUI198d9Q!jV_fQfGAsDejUYoSDya2N*v=$?}A9bsnC
zGc`{BXsrVyqsX}=4kni}r$g!0wf24vB$qtLit<=mk$4!05=~06>cNsP(`Jq~ao>d!
z?V@`~T=@;=EW6@C319V=>5L|A8H&Dc@}TL}hEpadP5rGs7u&^dv13f!Pmi|Qy4Yg+
zz}ce)h1PR(+?G#k=%RRoZp+#AIzB;8aQW!yDbO`Y#(whPp<O#tmMbq$eBuo#hu!;4
zh~7Ry6n#vgPn_DbDlLfb+ECf!(aSSy(&oiUBK0bMT8=)fWc%nSd{L49Me?uq?T`=3
zQ9BAGJ}4(;I9;ky97-C*x}T*k)v&k8mUoG321=T=V+e}6)Eytb{_xGpn^i>}BFYNX
zn=4V+g-QRDl8-<n{JBq1oZEd^al4|J-Ta%`o?jG-Bqd+<Lyg|I1f75K%lj)^&sNy-
zXtz&T{@;aPJq#b=CVhyJV;jv^`}E7}DOB#+k$1^=FiKwXzA~4;)(LRqX?|So(ah^9
z)^|0^3*^%m7k~c4N%{yo3)@nS{n@(G%Ixgi>e`sfY9q0OV?G&9xJ}iW%#cum?L_*>
zk~s052tnd10tC4%8L{YcPi;|DL{?LHYtOjKVYIt@jpj;esp&ausbodPyQ{5j%#*J6
zuW1c?r6$dis5GMDEOw+bbP-3n|5w>Q663n1&4ta)#^PpUX=gAG%*)HjjAu^OFpb5C
z%sNGJUhSB@vVa~#n(pKK6>&8u(RCi(#N|q9<f~0!cn$f;eAM>%M8B@sw0<7M!;cXo
zGHFmq-8>OZCYXiJ49qJ>PmZ(IIu*?*+wd$;CbWlz8B}kjwTP2`xF#cnfV%*)GM}oz
z69C$gQ(&7QK!h12U|uRqk1w{2346_gJfRjifW4`O50`<MU9igK>`Q>NjRkSt9J(}@
zbSL7X?(4ErD^LuMUH5NN^Q?^zCww$bU9&onO)^F{3fFXIlOcRE922<it;nMTbj5On
zj?I|%*-5l|v}Qs0i%!5Q2NqfiYM8r?9GQ2H7g1xLjDkLzvJ|s+ieTJ;2@QA%D<fKX
zygw&D2jEQfd|(ev`t_oZu*&Hg?6y4aS)lqrTL`g<w!^v#^8(5S)|f<}LTSR=+Ys#<
zaQ1`-UQ{R|CElS#TIr(o`VUna<b}{BE`&YAkkG)K*@dJHyr2LwGlU4i{%*tfvxNkU
zI!7ig4d<ZQRO80G4DwXQ72kt})l>Z$<W6D&#rubnMD6>}&h1u4cD9|<h%iOK<P=W|
zA_?%!<DPJB++}p%__D}B#E5jDS)Pupqg@W<X8y83ULpWc&2-+%8+5pX4EFY#-3{Je
z<Ye@MqDl9WGdqD1^Oc7<&Gy#Z$SV9cj2ydb3*q8;(w2|r4J>!mgnTO+o!_dw8(E>R
zuNl1Zn#A|hG}LgHf%ZECjTsG4PA>#MVlc3Lx?Lc%xLt!y@?Z`(n)qvY`_&SZnBra|
zcPnU^!XkELN2_i0aA5<>10I`be`!WqxbqB4%-Af=9pfOBom_f@01+1OICugRp|=p^
zdqUShk1=Eu_!~M;p^PS1J-jj#&wOka<7HHoF0uv30qEcX+etH{c0`YDtUSU$=TWF-
zP!0C^eFQqHlUT&v#q;J{n7kbrQsa?E>UJmU;|q*NEv7X-CP0i67)|N>xf_SkH%H$F
z3j}FpMjN9e3*vzBd*kEU)8XUWtACNCgk<)c2^wxXhrtwz9g-a-%@ngOy7SPI+FnJB
zB0W4UJS|nWFa;K}51od_0zKhRT|folAaM`k<hG57&sUHY6A4BQRBId7Ba?Xj6?S>#
z2peQ>nE}AjCFCZxQPq*6?wJYC3i#cYZcKQqW|5bhZJ10DWgxwxO7uDKXFPbE6hrkq
z7_G#Wb<CO3pi`ysLm2~z-v&LGbe`BQwGU}9e&OIQcq|^_@Yp`Vy*9UsmcF2%xFz<|
zj1Xi+yiwEotgjgSHnZes$^dLWn&(Ch0=*Vv`SJ^ZL3qW&GAZPeOk6S%8~?mWH~M0;
zO=r_i34@;JYQV12_Vp<F2#3F10QxD3+ukkoe>(q-Xiyk-PTz8J4(7O|3A!)na#(;l
zr}5%5o;JDQE7`HDWV~hy;+J{Dd~^8&-{WVHUwq02^$ttP><GfO)w_*-g1)vXoZMi1
zQuwD?o=XlADOar5{q>*Yv^6t=znsrVn!;y3g?&#G^e^)#P{I1-{qE`K-@Uo`6iz@y
zc_IF8=;u$^%Zn_PV&Ycbdus2Ozct)^+uMVZPD33R#r$OUo8QfQpx-@$`6m4t-z|H!
zEZ#95MGeX;*ePx1pN}(Yt|davp$cpe@abCnWw#w}Z{J&k@lW)!y|O<-2jv}vk*X@B
zH4<UOjtm#zAszOzzIt;0h<uP(hWQJ^Ddn<}=e80ZDZzf{*gSYAC~LkCq@QWMfRm0l
zlTI7zPx4f4HuTdAQa^&e7xvRff&4%`-Q~Ta2IZET!@LtS%sYe7_@}!1r|dg=4ea&`
z#69ABP2{UHNc$&6AgAZ|&gM`1v%cDYx_d;jjD{P=-|`P3X-EW9Tek~VPf(H<d-F2}
z`x}MO5Nj%C-~A<+&|`gNotC0>FV;8V67?)EK|N7dAM{ogCtlJ{`Ki1P0^ZOu^duMX
z&)J!MYw!O-o9i8`6Y$TVfccKoV6D%dm8e}<f%U>ITu9&fGgityr8=h2E{;QlnOSNo
zxyRUS(~y%m$%nx{)sJ{4P1ZLGbJ=(dLo2?}P;s0>v@+|nzY-hnVMeiG&)^L_=0J3S
z`bQK#ua+1yLgVc#_%R!C+Sp`-V}D(MdyW^)bnmp=7=&9ID4flhH*znWL}cUYEd?`h
zc?@qciD@zeJ<AYevojU#A0u)DO?;X&4z_mG1ofy6IU}>Rt}-%^qO<lwXR;>z$+Vi-
z9NMa>6dMD(@?^FW=ZLXn`Ui9IrE%Hjs+B55AOsy^XZ}1bPPf|O5t9*s`At&uQM61}
zwBNXySE$^ii<7i%)jRF#cf%4JR||H+DOafpF9HGqvV{J=ad$I2Alx>NhjsUqIlzTS
zh0U+8#GH%MGd88LAR$^qA6@OQ?mPcEY3iPD#==^-L6o@>+GTi!pUngoZBqj%vJ;*9
z7(@>D7+<x6H^H`<o>589{TN9h7qRU8-1)1)JMa*4<fGUE$jvymIM3K6jDF*rnuThK
zzp358`&EEOSqHbJSYYXVR#`<yjpDq<3-jOtc4Rnmgt><$%8+<3)5x%R4?}j(SLdwo
zo$8zk2(2(@iUEO5$(@)^b-(03Z_0U7K&{g`37w=I0YyJhtu*GJoUQ7xV8JeKOeW;L
zZ?A|Or?k;mtCGYEgWq)JVa0x;==e+}s%eU`FGs@Kouj86$w8^})MigJzMK#GqV<T;
zHMsz{i%D3xX)VMl7T6-+SqPzUoTsEjkGc?NK>cg{9&9Etq;iL$y*>dNd)5;Bu*~tF
zFxAwjC1!yBsP3xKXMiH&JtoKYDe|mmvcigg1!Km;PurNWVRZV5Y=nh0Sh<b(Z0AIh
zdW84@mS9KoVc}<DRHaIt5q4r9XhicCaPRn~+Bx6ivp?lkz35*XI$s-eKN?y;aOl07
zDioZ&>mm123QvArJsWu9%eCj;S9kKOtlQL;BL^U#a8!PIg7{~>SiL)}-vIA<&QJVt
zKk*wCY}>3>yt!26=71jnutrA+UMcBm{6+EbNp9hdU}F#Y?*RT7CGx-7VxD!lN@8Or
zQ}g_D;3Yc+Ur(9JY#5caz;j*<PAUC{igQZssl&G_sMem$6jqV<AgE=AIp1>p{JKI_
zBia(<$NF`uIJ|<ofCPrm&M604jPW>_j_=C*=u5D%sE~o$0j<;eiIz1<>E;T`ksSfp
z5g$H`6QgUh9|X@Y(PVB3J^<XlxwynRUXcSZik&=GfrGic*(=P#KtQU;G<bys|8oG@
zNAwdIg@#@Y{Cp(;3im%`hFd1HDR8qW{uSnb#y?BefK_7mDz=oRo5C#C0c6GN3cJRC
z5M=QSF@as6w8~)SCkHgYiCwJFY*9Qkq4bbZ^FKj7Pr;h)3Z2rgVbaZ6zY?v^)SH8I
z3sEJ0F3vEmO3f>x{A9CE*<BFvfDqqlb>(J)&Ou2tp=ruvE$rMxoSSH|EF&t+D^hiU
z;;bUp%&Sr{d}6I4#x5#J#jZ;1BQsVvufy#AwrcuILw;_wl6eadXWti$1=iHb(mvlJ
zOv$OAT(HE(Rx#5zs&Si7yH97|WOM50P)WBOs^GO8xZ9xK0E5#X!+ow$JyW2XQ=^^~
zSF}4!8&;ywZp7RNvG`T^C4X*}yd$Xc+MMCO)B2ZXnxEFE39+Dah!EkjAW}$)X#>a?
zYURpEw<oLJqEwzRSsgo_?^`x!lqu8a2@%%j=kNq2@+8&nX*Q?Hm&iZZkgmZ=n35lU
zQTVE#-Tfgx0FI4@=V0R7T^$A_wf*g6bCasH4Z)}>FnBWT;jFQRJmD1xz!{gXeAx~2
zx63<V!K-1eEiu0m)<;84!{64C=O1@0YBj7T7t@I@meEu<N}N?5#Y&dSBfZiw{jKF_
zEIH+BigT-9lKwo3YKoxZLw>mxg%ux)DzGSRzOyHx?Y<V&y(P+t=;BP-!cupz-C>Cf
zbe>?_eMWO!8fRqZQc3w&Y~eF^<oXSTRj)JV^G2GWR$;EqQliA6+M-`cxlKvAlj0&R
zov5HZ*Oy6fr5{w~=o#b(mwq_nS__I(wvub`u{A)py}K1l8~H?<@SPOasQH{pt@GQr
z61W28S9C7O#V87Dbk^C4gWs~=i&4X_7SMBuxCP)gBmCSecLji|-x2{Wk5J3%Tqdt<
z;rXjt88bXo(%;eq{F1y%726z}%<vR!%CsyCpt@2^AGoQ#mP7Hs>Ds#Fy8GEKke@eo
zmq}if*6Uh1x#L0gd7=1fYBE;{O*n106tw5HnPEvXNF6Y9XZkMPy0PVdW?VsQh0MXJ
z#|)Od(DZbFZ39Oe`xJRW_H9G=L|%pKth>4H;b`b|0W{O`HL<f9GBppevr1xt2jak#
ztHjOSHCW@C1%d*i(!7+FO;WrB+#dUmj5hqV!6&+w6DKGgG2M8X8vq&E;?Rg}=#K0r
zO%YhLdQ(}r`hPLJ{P6BZfbo=**_tenX!CdGZc;#2I9`|g^7ZRUIih&%=SgzD9e~(Q
z6^~ztytw|7w7%RsV)^4N;=K)+7WwsyZ4sQ%^LWRfZ=>B+CK2Ywgm)6a-ig+S156zg
z)H={#)_||97B|h+o?OB0YZ3UV<e0Yxwf_rg?-$(MZNv;|w*cAaHx>GnlA6i3C%z8w
zx`4(8tGI)E<qj#SaA-OGB#@Ob)c%UKF_;=7nHQ<4-%A$xbPyQg9@}L|aSlFq!e@1i
z*5Z`B!YPB3n-#SH_lzUXfEz{2#2L#zPL%mQ!Aql^EX{U0kRA8+wtj6&D&-5v<>=%x
zg`Em=i4FQ&GCsmrMirtv)S&h$ESMG4tp(^m&lKXRl4J&zHvS_6qTC*Re3zm!m^O%0
z`-i<3>DVC!=yDVgjldR>Ky#N-5^&i7Y6+@d=a37#zAu~62Ea7AM4%HPwh7_e6egJV
z2Eg#5OV|SzK@U231$Ylu;1!VIlK^2GAejC82I1QkCfN2Gn-^@r^#*Q_R^Zi;;1dF2
z8!Q+fbl|Um2R4G=rGMD1y)l7}KRCpUuRFvI(15Ri2Rp(xbTB^nfcKwUJ%Z1}A1J~J
zcnP)vtS39-@sWs$`N2&<S0L((f2}VD@8o|o13bLA?tC#L^o#sW3F!O?VqPD9Pu#=n
z1Zn%-BU^C}af22;!JTB6&b9pFd1_4I4MIXc?GlOaDDPqRiHjMXl)j;455+Bz@P(}&
zSjn^H#b^_k7rm_YVT(Q%SHKhO@l5|h&Z{Dn;he=pdqmFP@uVWo+(lba$fq5`Gb8<Q
zA50@JxPM!U?Xxo@r4=LdX9Q>@?UY<XfE$*7G;q@PbE}eisPR$Cmn8~n-J}iS3fc)*
z`ET!r7u$zoan9s`wQ}ZI6$dnm(mu8;EnQKl6S1^aC12^NYGdg|-im&rTrVz2K<5=O
z`K6lO*CoL4go*5m>MW#Lkn+o$-`ymD{e=koKoZ)EaslkU+-$J|NXf68)3hkDF>FG9
zWvg18ZUhbDX^|k-=Tk39r0JbqQ?jjZvKf@Cw!Ru#e=<2%r-=Kxlv0H?haZ%y)bvYk
z;^eb&*0K8Q-}x2PJQla_BfT%0dZ$0TUx!sV1up-2MsQTfD}in_g{J8-7bF$q{D;Yv
z7;Uf);U~!Dn|%77%H$c#Zm8FSt5>e+6}EWMp2(L+PW99N^qF6Ra3}DBdv7!KEIytz
zhDuwlx|!iJ#%YYXK;&AAt<M1p=7fr0x*7Wf3T7i#951}jF}*bsFy^rdS=o<mmFtG#
z35Vw{v1LR>3_Rg65*@49cDKXVMmsTPM);kGGP=Sl?aD&$-hvOS>`1Y|Ore4;7R~zY
zUpKizlA1t`7NM)9s7tPz3}8*Lm%@u>5{@Sgm_7Af9^_5J{dAY^eLBImCz;N@Bh^()
zwV0o8i)*(8r@0Ove`BXY=3k5WR<*s(c+QFqn%>)W9J)cuA$qwG1h-_uEgmWEu3zbE
z+kWU3mju@c@gP0VQ|vq%fD5f>%R;$;w#e}M8H=X2%+Wd~Ts_czeOQfrure-l&N!#b
zzaA*Od9p$De>aoQ*5_<E3d?!1WuDF2*@a|`X5sH(<-*N0e*Oq#f5>)6;O5)WJge19
zw$D>*M#OPi0?4dlSahumIklMIT9#BwY6?WxIj4Cm1E+Y$^+f$UrSG{cxm~8GT(?d9
z+9B~z*3dl5zCexjmm>#6vVJh|D#9otPJJ6r7_I{1?(rvj!p1)}TSC%cl=yQeYb|G>
zdVkW(%AOf?J3lFo+?W-5%qzs59$usz*Y+=kUEUNs3%$c@X?%7FbXBu?C2sJGtNQal
z;WE9GYtQ(kR^0FlFG5e>3E%tXao#rX`eQV$9yUulZHR?B@B%FAh?Aiaw<WaYGg|>i
z8WMcgJ!cKsAKM~IEl(D7u7I@iZA1UG_QeCcw6>-!TM1gN$gGz?&D+^>leXlz{6#Cs
zYNpt#9JZ)i7H_leI-{fA^O<bNMM5q(U+oQt#VhNfxIXZ~i+i`}?1PKP+8RSPK*lQe
z<VK&J;yBSd6bVrX3S(0uMz6S$FN~KO_k&JIan02L%X1<;<3ew8NodZeN~qX8C`8Xa
z)JcG9{3eLQp69o|8MSji;F3UCX@C6rZ`QH@Fporlbkkk_CW_;K@j(RtzX{|2$qX`$
zpO715KoG6ln12*JGY&b04~9)Z;7%<!MTRVv*vfKQB&)o%*l&GY(fAKo(12VGNH98I
zW`{f8^UBL8`5*5|AQ>PT(2)wYNTnpp7K3FP2i%bko_`kPOJrKzWhxwZcW$H%wv!PF
zHWsuD@CW;k@!~V{vx>R7R-->qx+t|q8c6uzU+rt0UP~pCt=>(`;-s^1RYvrTtmCKI
zRii5n_IwhLxyo8*+(_+r(xfTm(^A5buhYCUk-aP57YOMr`nUI~8{jUWAX*M(D-F<v
zz#4k_+KjiCZ$9`zk-{_5GYLgkE~YnVBmDoLeCR(?>mTdo3K_poXyy0#ukvA%5~emb
z_W$X!;eN7T;R6E$BL=f`1Cw(DqZ0#Lj$X{p_7MX^gq6*!9%>Z%${y-=`Pxk^oa}b7
z76W6eZ$=~z>HsTkCKoAhCSexeA#Z9Q!m2{l>3=H`9jma*;H-#-@c|TlLro#97$)^1
zD2ayI1Wh5+e9y&0P=_FR{2(_iAxo=gWME)oVEhAs3{4Eo3=9nn9B*a{AO&E$D!31W
z^WWb1fAr&%ityC-mmkmjivsyCD!HVci>Z^Dp|Po`or{y_fA+^VNn0LS5F>QQvTX~m
zH6%4?+4$~{sbZzZFo>~{jH1LG7KF%6scCS{gdMXkjI7L~kUh^AtUYT20?QrvYf<!$
zO&h2*{rXM)4bP6D+t26M8+ISfF%km<rZ|#%B87pJry0xwS1lRBaIuu0@)#zrDDH~u
zrh!Cj^;Ib?9^{4_W@8GN|8NwWJ6jZWZgcjOC)h!_Nh%?o+eP7-X6;~KF0;suR_xK1
zF$8+)GDb7_b*KSGrADkrcatu%Q_6}>!Kz)(hP93s6<lIwn`sNi2-TEZg`$oN525~T
zCk1z|jq;gBR9nz88C1YX+({XjE0&uzZ0MrDP^b&K(ov$la*GPPV(NSf%%HDDW}u>d
zQI1WEt`?QMIGK*s@WE5sME6;oK$VF^bvt(>9U2X*QYY2c)^nwWCv&AJux2y{hM3rB
zbwCycrUap^wJOp%cZ1y;HVpAM4|lF9`i=D125Qw2W_!p6SZFejcQ+;%i-9f6%$`;5
zB39w`94kI%JYnNfLs3Y8b+jF*I<Gp%w?~81YWcb2tSo`NeTE>tDTWNJG@i2ZYbu+d
zS`j{N=)U6pHGv3TJi|-><Sm}i4keVK%+WufE|-1Qkuhd2sq8+<ytKiJKlky;QrsqT
z_ym*n;8UM~SK9dJuxo96R^7z#3gg@{%pk@b8$o;}^5PXv$1ZG0Um>SWzNg^fmcenz
zeA{vU;-lO6{U=_pI`IB*C&1m}nG9<(Wk*kS;q+OVy#TDGa<d?iBR)F8-~0;K%tw6l
zk^FN^y{{$r#gg+6JY=7R8G3Uv3Xg&E(6ndm7zMY9J5NXd3zGSgHukacJ3hmI>G}U<
zX_HcuRkJj8|Ifi0qpGWnZHnT@MhHnVAwC#Zqm(b94UueNYnuWu+Khn3M-mUEde%r|
zNJx@l667`l%}@6zWd1k;?H`2hCRQqH{t5U^HhSl|vlVQr89Zk4(rrG&IrEVF(B1Xx
zY#r4Huq(n4ML*7*jR@OMj5Xkk1lwpLpo>DJ5uLN}=Cc`cg2Aj7X^6CeiMvgxUk<j{
zrNM{3XsG8yTP8>J!RikUY>m|XcNDYeSE12L^`Ir$FW{xlJ)5XHF-iVpBv_X^A=WP?
zw<0SRLuoOd-Vtb~IuRP!0bLxu!RT2NFKt%MWH>74O{U4A*3jH2EFoA`<uX$L`ni;Z
zm)BV+#Z#wB(<L=BJN_QqZv+c!o9A*+ri=nINei0eth`nc^SC%LDQ0GB<w?xi+uYt=
zn4h6dSQD7Cbg!xg5tjwJkg4L}d`q-}HIC%n*=`#Cl@4lL(AjI5tlm7;ZKAx`;-f~L
zWpu@XU5%R*tUX^+=I)l{y!KXY=_8g}v%ai0!pfxdY)EjsI37`NF}z8C6a%{U$4s$g
z+w^X>B|L@2SAJqi2(#?Q(JxU0L|#L42D=s#6dY8C!d!#Wt=`z-UtL#@Pxf=hzY?u>
z>%Nu0J<MV_hO${E+%=Pwgn8ArRnNIA?uMGJ5HM{_2d2io9tJuFSQnQWmi9Vc5?j-z
zy(qs?9#uyuLgngJw2GB?v{~d*qa6=J2#<<5ViAzcFu--?sc2rL^{H|^#|ews^Q6*x
zwF69Qr21@4A@~-QU3L;%m8#n+BP#tK(@bwT9_mN*9~8C%@%j=Y7?iQrNE|nn^i|ac
za9vS7hD~un9hZCeiA-3I>$Uyk+5&BIot9G8JOt1Xaa3<EJM)QH{Zs@~V{Z94O&SvQ
z+|+1QvJd=qSN*l;ydOPe@KURi)-pC~7mD_q!O^fiOLTFW2->x<xP@EgMEp@PrY4pq
z0m;zAvBQfov}E%#^CZnOJFd6CfCCWWcS8NBwLl&j{#3<1J>&jy=CXT|ADP|yg8Vl=
zveNo6Q_bKGIV;$YpV0*VeIEOq$wRk<KoJSTAjtlOhF6?IVG#IPh?y}gS#XH5pRo=+
zZXm3G#Ai&7vbvuiKEQqYZuWs$48dsAN&O-{oF(=Nhto$0h~I5wU<d&vs??oOQVnqP
zd7}}-4l)NDeB1XxGfyCKaWVOXq-SBbh=kSF>S^{rtMLY#R}9x3r3Zr7f)&BZG{ZP|
zwT50`dIhQkAmeZfX#f01-!xWPuu?-DZICtu!Q{=Xt$OYS&x?xQx0xca4XAa}iyMz@
zLT}*e#2!KN*SocYhN@0LUc+Q@4iPtSLimJs*k#;yNvmdU&du%+W>QF%x+8=IyRw_k
z1c}{I=7eZ4-)i(IsngxmoGS7?acvk!vaudX(tKmBR6C{ilWBr|+C%rI46%^-V2bwl
zC+1A)#W%P+w!wczdtP^JyQ3)0vAeRHEd10RgrJ-S89GG`TfrIUzN*Yc^qrni%f4B;
zZev6wR@Yo}T=sY?k^qQH@Az(FRG!KglLmxU2X)<C){mDh#Q{dc6}JQI$G*AKh&Ik8
zY+Xf1a73wi>RX%{##;WtN396nW|W=3Ouh@MT{RY+s}A0uMvhFsu0VNMfBn<q&rz&e
zsbHmcR{8<_pJ9iO{yHw^ueYE17h8k&{{l2QQx|u8Cu>y~OPl}v9J(fJD=!HA{;NZT
z*5ic&oSp$Fv?7(@DIg$(070c3OV52FxDmo3Ls1UVUzNMT%x@j`O3=S4M%@?Y-STC~
zv$k&PcbY!_p6&Se`T=_ccE^H<X~&PGhk`Q>-7$nZniP5zA28;gdPFu*kh$;-I<v}D
z1~e#h=^1DZPg<m-TDW!R;GRK0o;*(yGb_W`kQdpsb|sC4^k9ljt=HmCLb8;K@k59-
zZnvUXOsv&TqVwBH{^WEV&CVx0&@8+eytitl#UvQ4@Tv3!B}M#|(E}UJ?fuDhZEv`%
zu$buM9OK$16z)<ch1!@1Mr-MhuB14<q<X_iGMUJu^+Z_Bo!~WE7A+Sr&+!THAY`cd
zf_)$$*$=sEKa#42VFEqFm+alu{9uBpdeS*SmRK%TMP9Gd04;1vRqkQ^yNl{8UAqd!
zsOSuQKs6;mo`|J~l`fnpUhXUfe04g@V84rs1_DMEhTSZ)@)X!lJDqIntut5{K$FTs
z1O1q4aV}be;Sp0Dz+P##?_|wVjioWz8GyrfTE}E-cO^-^n^33IERdvXWh{?NIYxY@
z08L%x>GZUOoE5qI_erlzxH1kbz~+4Y5-S7+DDPQoqDyy&yOyG|$}WoXTso)N3(h^N
zzt#P+wi8^mR-1k~YnXtgS=HJE_aR<u#myw1onMMHeXdGjkp`Gfjy1Ovi_z<AC7jUj
z3i2CTVMq!so7GVc*70(mYOqaV`O!UGSq^J<s)3V*PXu1P2<4C7;~nmS81T^{vIqt|
zOz{sv>URWKK%K<lGy<ovviQR)XY&GsCb=lFztx4G9h?lq6XxQNG)|JmFv`_}VTYaF
z+IS(m1M?(qAMNI`cf34G%zI%dA@L9foiR=t<9m2P$qZsmK)5Plp+yG><LHp*deb9K
z{UOmzyGj6q6~Q}%V;K};jnF2oun(m$3}Rsn0#_aypO|kth_?eE^CWG*J$0b!^s<Iq
z&bO?XO})G)ny+?%)Up69qE#_e3WcDkW8M5#0s~yQJCR@PGQPz}=s!z}Fsrl_kn6A<
z`LF+lz?1EX86x}*-et%D0FeLRWKwXlcd<9N|GzUSMg!VMd&%>MPkTA#=)&D*)KMZC
z0jbw030yK<xG(i+NU#w{+ijm@cl^GERy;MDgqzXQ6Gh9SAX&VC#Kv}D-K?2sex4H#
z@VE48zUh`!SB-90s_j-#sx7X!D=F+(o^xGoEiUBE%fsInpA(-G|DKn(LwjAXJyW9f
zfASobe^AM}Id&wcE>Y@$$ZgG3qoIa{5(b9loE##MKTG834xGqscS)htIVupc?kZ90
zsP44Rcdf{;PE7Yzi*B93_4kUQ8oDyn!^cv9wn@dRK=!Q|JCz7(5sRvZ0IgB=$iubn
zDv|5F?yL~%#O}0icFD-uAC~r8`unkSx+B=5Qmk>hMfYl;?3G`&LfWV|3vb~<<hWkB
z2nXas>fCQ(<<>`P_Lq(g+JZJvpE=rvF^tpFw`v^9l3s3O<gASAl6*Hw**<VLX-i)s
zle};?se1rJKC(AwK2WylPXWR45%zIZUgMLzG!$RS$U$Gt`dKK<Dp9_}2L#E?Hw5|?
zxG^wJpBWmmDrRw`$P)W48u>9Y=AHHm9m!gnsCU<n&Jui#=q8hIp4_jP$h!@!9zOo&
zF4H;Ng;(Tn?W`O>r7I5_SiiJ$`_||=*W%jz4ak*))(WgBA6?ixyL0JS$U4}co!<V#
ziD>Bb=r#X{@o0H<W{JySp16D4;+Dn2LU#${;iU!q-ON<fcXj?yN*c+O;sy`m_R$5_
z<^B1w;6RMf+qL9td{Nu1lEkriWqWnSYA&Sk@1+0P&Dlx_XAl3;)5kU!j}g(h@#%wm
zW7s2;a%F)u46Ba;=kW!!^8?6eWGwmQ?CfqrS3Dzmo3CwSVT1Y~rUmf|*p^>jT?=$7
z%|k4BS<qC1flG4-J0`rixHdr(2j!G9gyf$_@A{&J15n+)GpI)x7`KnTBSHTSyf|)#
z)wOGb*Sf;3g}L<;SeWaxm*0AQMuztJtg~mKAw0M_AGk0p>+Zotdxvu6%GxXNpj!=w
z6e^dn;w`lm5rnjO5OAU_geEj7(U0H<hJx5ht%n*Mgze<!Y;o-FqsC(z6lejDpjs>-
z`K=knp-<!MLAI_L$&=Xgm`-r5{ozGgjkIV{7J{#hh0jjkoxlKa!(?QEb9>hYIzthw
ziCn~kp(r+8Xi)MXh+a<_?T-}^22Rx-v^-X~LY+Ph%xt8;vLE@b*lkQ%t0FBPXl3$G
zcrtq$uFMc)_+s!ddF10~c~bJSL_1yShW?y6r)kJB-1LlKhw)hz4kj9s@}B56S^a@j
z*Ou$*@fq+L+zwE>@&Fm#hV6=34RMVq7<vR7N!yVcp{3di2${cZT@^>k{uVmWq_E5v
zAPdJZS&zHxs4`aKfy^0%7-AZxnzO`h#Asp4yyC*^CQLX{QfDqG$(3*AM68MZvxfQ(
zN}+6aNTo=Gt3EX=OXnY6&4(&HR@r%L2zSYHN0_a<Xu<ZfKJq+xQCVR$W*sguvqOA&
zsS+UKh5=j`J$)KHZX)H0p4NU$EpVJnc0p0HZ|}x?hRQsKsn;GJMA<O%)?NV9&=<+7
z4veVsrLRgO6ceX>$)!XATuC@lqKaa%1Hx6b+1C;b38#^y+(ycChE>9tX;{RAP-Meo
zy4=Yrk}BYf!S_KKDsAG;rxr?^+=e7}J|q}OPeqrz0}mp;nTw_s+4`UPO^N{2bQ>*2
ze&$h^BrG9*`Ll7eM16`1wc^-w`xH^DDfsenQHO~Tu%4d82nUHwf3*<g>&7qiS9{6P
zq`~I#bn@07z|R{!S}7bkc-Am;Le&Q1>j|Z-^(<gYg!T}5NtH)jNQ^suyadu2Y9dg=
zXyGjLO@!t@_V<b@L{r|^_R<YKeS1XTp+e;wRMTy?&MXo0_kuk!DKIWn@tVwN-3@G1
z%o#k$$dYbV@WhBDhNC6mI)-UxpB;qi1~E=mD;cuW(Op6Jf`=73h)Rl^*x69wEpZJU
z^$gD|t&s+*9x;6N_7qb_IjZO3J#~%-aTGlRb5V-Q#~I(5CezMx`^|Dy-q5_H_S(lG
z-!sQ0y+SLfq)F91Bg<6zgqn<kuZi5rj#bWxrVDpwlfq5Dm~z*;M<-EAWyRh*hlbCf
z$aUuRYBhAClZ-Q=)traLbkx{V%wN?~<REEd@5EK#eZ8cj`AX*(9-}d~O6S!>`bJ;f
zN>id1^;3F8^wrL2rYatpy#!12=Rs3?gvwOCqV)Q<S`>yVPWLP>C0x3s_qbI$hukYa
z9WzDS92P5Xb*Xk}pL@maA&_N?PN+037*znI-C{ux`s1TJ3LmTS;8Jrc;;H21T}6xp
zhukf_Mxtfx6yK0P$2>0^VF>CI^!p_+&i}nSjR9CIz2Wt!AIab4gtq{6lB-M9ccr&N
zrxZ05Q)9Shgp841B0`A`f5snYN?8on8VsOj!;c?oRjV=4%8n(0#etDGU(u!TPrX$5
z@cV;_s*k{pD}$t9Ze-GqHDEW2G;>g8jl<uXi|+E3-wIRlPsWvha_(g~F}nvdWkNKN
zrF<)Rd?rFnjVzZ-OGg}X0m<(7*Uws7h=J|-C>s(F6cW~C5~@3QI)Mf{E|MrqqtK_|
zAB`f&m?61XjuAJ!8cJh)XZ!VwQ2FQ56(>3grbvNKe^!#P{8kh{@Bu<K&>Q8Z4^&de
zjRNN%a<bcs-M6r1KOQbAiAI+wZ8Z?uXv9>ke3=c<ph~Zr6~q6xS-%v?#>~&in#|xq
zZOzTi`~uUCjhtCBGd*dBzbtg0CNL7)qW_P|L21os47O>Niv(Okf-Fqz3|3Cgs!)V}
zW^uiALv@pza(LyZ7pw45%^RX)JJQ*97S1-DiIz_OC!Aq|2N=uD0@12f8{OqEgT7wZ
zC>OnoMZ$8+G!~U{!DFMEJlDw|znQL@UfoF2DmpKHrf7^t>j(vfW@XQaF$oUJt|^#a
zOD4B0;sy`&#*DK{QJWMiTc}5CMKqwVuw=!yackxV^b(BjhWu~o11C0LNew{;aN-0)
zm&I!d4P-(EP``@1Pf!Q@iS-A11KnHUeE)6Sj7r!MUOl9x*nfs)pEKdJj^%7oeQEA~
z+wfJKVz`r^(nDZx<S^b~inXr8%<hJ<6&B+MM=fU9CN2U(e~s2Cf1Qy5?T-ud>(N(A
zy8JbTUn~M5)a99Qvw}ukAXH&?K)y{OcpzVh(6}JHCI!OzVYJO)UPD50fp|@`hO-A~
z?+mBmA-Mp!EwhZXNoenAvyRX?06ERG%K2rqkBGD4w2v^S+O&_1v-mBu_`tqdAw9T{
zxM=Tkv-oYZ?g2d;LjFLH<g|~x`8O@Ic|c$Phpl%GuC(iyK;wyR+qP}n&cwEpiFS;M
zlZkEHwr$(V#JqX$Irlr?ch0TawQJY&=k8kT`K|6=-Ig%E1SfB&EaaO&ufQ;yV6XM)
zZLs%2=&wXG2s$Scg8oy&0@Bt@;{D-I`>B>g7|mFRi`itKZ9lnUn)T!#XQ4XOG6S-5
zwtG3U{ZhINZMv{Gj3@Q8c_g^Z(J%aZ>el5A3KE{xBi&|JI+L6ZwT(I}{gR!`N7*Gj
z6&QfHjDV>Mwu8t)z%p}y>qZfYy0_S69=Vj!r5)Em;_N|kY*~S{bJ8?v_#|S0RpRRt
zyAe-3VH8WDg0mh)799DTfew7cU1z}jAtif-|8%4wf$=?r{wkTK1$q$@k262z0u=>y
zhwmz%Qrhj&qP}BKoS0C)?E;wI>Fp(EYf($Bmm(LDzA|y0P^iiZ6heFbNuJ*x52AKu
z<POrLa8R9JKE>gb#(n8<Jb?8W_~Kl~l6b{_Ai+8O9nD$cVvz6Wu-_4sB1Dnr4{^c6
z`FzazjG-!WJ!kdG;n^Y+$_9^-Z$FmkFe6PR4o|#~V6lXlFTNH`*<2xu+l!vx`NQ)T
zbimMt)rle$V`z*l($QIVKaXJ1{m%Di0&%B`fW#riNceR8zN$wIHN+V8wDRJMmxt}?
zasehW70gY;{tGldLl#&T5ib#MzX$w2RxgQL@GWjXOU$t!uM#i9Zuq`u--<W%G;7wd
zgTn(Z9Z!(`BrYAsHqDVm$a7T&$ud^9sd1x+o@EF2kwrA7KAFBXS?5($-1d#07F|ck
z2~9|Tu&}bD5~#Y6qfrSwDUu^Rnj<}(DbHpvd|OK9Jz@DhC&s)KeXfeGw8Aa2S2E41
zr@pjvN0k22;XQSA&ho0*_U>Ylze4Yy;}bC~vujLpjQo@n(+ppIfT5$(=NoGo<rsa)
z=ZI1L>fOzc;SOPco0fYclC$QbJbdZ}yy86wpAhypc@<A;BxMyCCm{`ca4N;>7~BoP
z!3{~a4bk$}G;9x0+XK1ej1}qXx$Kb)3wEcy4q7!Xj8gGj0Xl$BwQ1NS0K9fohio4_
zh{=d@E>3<s-)oQDW%sy*Dd;BIXAd2+8??_rBB>)m{qCV5*}evHOR!pohUY|@CCwn8
z0~WXl4miCIrpC_^YmF%A2eXSHK#y_9u54dE5?1b>?zJh=sFJGx4_3uu!;Ch+=JWIk
z55#`?GjvNQ*)HBgwPL;GsK&|JexgDAietLApbcU$hA2u{G<QEa6Gv35DIq(IF{CI_
zm~~7-=T9Gr$$2wD8&NL8Et%F0nZgYTwG9Ot4}!|w56CdkhG>Mj2cIxr9~93QvpeUR
zykq@W=U15UMX~Lq%RWyn!^B9SaTS=q3qF+h$y$c`!p~eij$0idKGCK+W|r2=uPMzd
z)HY4KDHiLG9nd;k3F51*>OV_uBRl)_jlAQB%KuVdH0?g1Jw`h>bZQi{PbrT>X<JX;
z;oW2#_7z{Wxin=L(KOSjcb(7sK#Xs?@qhH@-@L%pFKB1F$E-mt9ovCj+HuNUIv=ds
zRX1e^aIYguvoxTfsif9MOXqBXadAOUnyB_BFz(Yanq;JVK%4Oj<M||dc>;=F;L~^4
zPcXOThif0jx+3)tybH7tdUc263Xe~`85bc`N3Z7%#;gxIw0Ng*2K&RucdHpwg5n=&
zKiI0Kbaj~w3JHuu;V_F&>`x?G-t-6FV|U4UJ}Q+y9vZU^<yy*<F9-;YFQGqFbDIy}
zt~GnXr7#>rI}|Q$p+EY^3=$}%1Y}x~H!Z3<XqGf}xn%G#vlw-oWmf8e6WZq=ieclC
z1VDKBq-CC&R!&+DsdY`xT80)Ju|Dz_ZaZWtx+G}`iAY~4RN_qlq%>N(Fqx$=*0CKm
zqHiHGJK_c=+K{V=HyAk^Qe(#|s#6AeG|vrUaY-$Wu0rbAfd?~a_Y91AVsTF3t{vL%
z+k=P)T=_~CrB90)Ib_k*aTt9)-2B;aP?|yE&_5~p^@d9Yr?QJWhFGF8W{$Hg2jq?w
zV6+f6!?OW#(~3j_GN$ji@qc0U2Dft_-sP;0>Knq3Pq6)Zi9I5fJO)nmUNxwm8!b4x
zY$2<Zx5UbnPqtyu&_CY`wbF)b3<~b+>g*ib4~{bPkY)0lme*@$A~6(M^@~CLKrs&g
zB<Iyk+!)iV&H9f3hFDjXH{Gw(4m}`rd-Bxs)SkSjl@RXgmfd4OK%>n|tILf(Xdg&|
zAL%TPF?T1n2kAc0qwe^$r?yi-7S0U;NEamyU*Sl8$0}SO7#|#nWrj;%xnF~|i}y73
z$4{7-Ug0(l(g68*V;;Gt4VTP?@xSZI5M=|3DwHV_Tw)wy#9#iagv0ehUG10$)%8M6
z=0Wd^#&Or?LAX;OX(l?bcX?2)!xjQ53*F>F1`JSXMtJBeWayeahv<jW8{4TZ{2~j5
z#N@UVvqMt98*PXgY7H4O8NL%~yIinbl7696(fp1z6LHguE02;klGH1c`4D0U4Xrm~
zr#Cj7Fb}vl3)*PHBsC9;G7GXXfMuq@Ign%@xf&#Nt`{1r7sA(uolAye{tf4J(NB~%
zs4#)s^o9OE^yEMK@u7H$L-x0J4Eaq_{_Db#q9?1gn1_j}gNvoT-G395zenXkgb)E=
z$)<@r0GyUQiUDv8Sw?L7l5hYf$w+4}JZmo&3oL#0?xx2>83r-}|F7$v*=0s`4iwb^
z**gu_2Ll5&^DTY@ePA0yA|QdHqzTG2HD+@{E5j_K5_+3aYjC6GLeng~E2bI}z-N@{
z5}qA5xV|;y_S>+@*~{)#R&O_r+-U(8Z|xvR5Qj-744}7yb<lA?kMe?-VW|XpOq;CF
zDR6$3&MPybf+fm~!9HPeVAS@`;;V?19xuJ4pAmgr{RK^7p+!sn09Pmd5RWft{g6%U
z-FY1GdhTz2eD%swWBMjU<$OjJ1i;~9drEN1YK6>^@L}GMuB*%|5HinTG%Vhc42k*E
zm%RVrDQX^YIs!A>HNfk0Zx7_uE66a&&;39M#Bj(O=<<vD4=R?JK?l@h0bNi5a6}Jt
zng*k&R*h*NqcFDgdFwA$uB2rG1FO8^gc@<a^K`~7r&h3}Xk%G~VvV*65swqlsOLE+
zlq`A8`1B@L+O@hyBLIxbDp=ov`if*TT4&}0YGupEb%aspUcpt`E6D$V`j14clpA{X
z=NnYSZ&3f0l<^O!(nfAZ{|DG{`98&OU`ax#ff*>n+#Ny|!;p~K5IIW13z*2M-$eL&
zk!_?Hs1<gHFdW79`hMKxMmbB1z%H@Am`~kjUr#g`Y!hSy!PjPrf}AU}7uZOzDk~`}
z{Y-TaF}c&1;jFUMaZfRatr(57TS-259JjvBV%V$p+HBr>h7!rM(=Ku!L!`NEz)S@A
zGGHFL%p+@jn&@ygcWmZMi#NmlgDDp+^;Xa+^TESs&KjKo!gnN_|K2eJ;KbX-JDNym
zu3eS-sb!IB+at!()#?uaL8*oQ8f-@Z0)j5@QseNkw>J9_5}3x$VVg{ueNJdU>C;wj
zYo*I13`WH+;jO->>q3Soq#;p^ii?WoXN-Q@BUU#ihLUK;CsI!hG}=6>`3Y+XDe4%M
z0=G;Ow3s+k6<qHY=s2!rJh3AAU*IDunP^!}LGoX77DPLV8FPPT+(J{+NmWaVu@Rj}
zG$i9zk?VR78;nNUM5FKp7u}u!VRC&jC8wOf(8QaVWZ{gMb;PqaklT^j@|dM6XTu)+
z#S>}KeMDC4AO9^T^dC^`dMXfdzd`*+Od$9d_xTO#e~StK2iM7ea1EJG5>EOhKuNua
z&?c<NB%B8o3}*#GG$3l~--|;jEVD*@$8V?GMm#(4!{HbP>o^&J8O+MX;_h|S&T_dt
z{YLSN1^0QkzbO%qs$Ka}bwKrkg;u9;!K0m8R)*#NZI29;h6xM?V7IE<T;8#XvUR;2
zTcFJ2XLNJOs#pIPHtX3aNr(y)A<(F_;xupWd?DC#*xI%%cuYLACS%9NEiz(rffiuV
z$}`fS%n>A8N_^2}#IMh{EVf((TnD@B(K?vzQ8xJ?ezwHU*7-Y0qZH3bt7Et33kwrj
zW$jb-D!phnMTL{rKJ$r|IHZ^xlYO5>=c8GCx+2nAjvh6wsXyEXj__Abq8in?GVwun
zE2k}+9|yfLf&Qx&P(Qlrgo<v?Jnp+9WigH<MrnM}2&ET_NDaquo#<IHWWk6}gy|Rv
z35q(RQx0n;%t(TYmv8t1qH{tp>%&jnzSogM<eyD|8fPxL)4GV88PJmOgIUBZsTD3Z
zJ<ghy5q3Z+y{s9|3Tg9zcu{nTta3f8vE5Ku{9uMXrFUNUe=b>VqT-}1a3G*6=znWA
zQ1o=Mu($j7TBs!TRTUKt%r8C24Tnx>s7PU*6#Dc5iveow9Mv8;m|o;W2oAeGaQfgs
z4$dSqV(r)8i8k%>20mv&DpDC{MZOm1-1Aclp|8~qzU3cX2_cyKA`}VEc3p?=U4P$v
zk2l^QuMq@5R-)}upOL<+$#bA7JUFRLWQE|eD*D}mUo3TpsIZFBYzDEwp{J-z`nj-B
zm|5AyQ5Q(c4DrY_XSi&ohZ+LSf;pW4*{~F5o>|VsMTYp~p@Uo$z5O5v=n^_n7Ne5X
z9zacjs8i#bF2+*5%vr|A^BJ#EQRb$)agIvDBC;}T!<El-cX3&=R`YFOPdgQE`Y;2m
z&fJJ~bp07u@jH7hNSpFnW=u7M3n@;mrtn-?5faBPI(@IW$))Q7adw%@l^H=m*6ncf
zHQuDE-W?QVoUi){iCO{{sa}k11hbV&bjT=+1Nqt)1!h?9JoYXtEaJ+-`-aAz(>6QL
zb$losZzcdc=DPR>HX03#Tpg+ebeW@b$D{?7ih;RCsm=HrqGQ&*fUbpwM7z@jlD>YX
z$Ydjp7%Y@5dV1+>1<AOQt|Me<1Uj*YVA}yXXf2$&#*>Y>WW+Z_X4(jdTsMyxmjuO>
zhW&7#OKD{eig49l%lcz^tyE6WuAVmJh~kP8zZFA(ZehX2fh?MLO$wNS7*^(PX8ngJ
z8beKeYo0dlZJspTS4~994KtKM8U<7(`0%eZ$yL?Ds>x#?YW?1F*n4b*YK<Puah45r
z2iW2ZV#%q}J-Ck17yH|rCa{u~aG*vlE;e5(5^{ClA1SIGkx!BsVuNojq`A@>VYX-l
zJ0fV8kR{KM{nY$J`e@mVFwtlODz{XRW6!dKratn%+Dqc3x^Gv$Jnpl_dP5l0T>~1r
z`^(gBjsaCX*bnSv0FrvIMA<8~dC4(W=gBR_F0M6uOBPgOF`k96qhhmr*qdo>x-py_
zKF<y`ZU4;%yA;X6o~>SJY~>RO1+gCtb1i3M1ei@rC~l>}u(i<oXGHhv3Z@EJKA#n}
zi=&<6%^|@>AXk?f>XpfoGr;jR`E#?a{fYKi(k^SaAhV6H!qSms-?F}Z75z2mEgd~X
zda-fo0qo?gChsVQRv?g#<Awws-57HOI^@}-@kTn25e^g7nef=m<%*L(eetAo%6k0M
zx~dD@%nT2g<!ozl6G`%0a?l#@;oDW$bX`Z)ItI;y;<_PBC+Yk5ZA?LiY(}rm2Pcxq
zR=o)|AA%WO5l5HWnH8aK5LOwR2U9{lG^00*3N&$q$Dr5>$R3iD0xe+;rdE{2zG5>a
zH1c0(thnWv?|$HPT>y0bL8VB}O>o>(9^q=DbvXlcHY1ILYgj7{!nZ0R`F^fcF+ENg
zvJSpzHmnk?3cN96ytK2`g}4A3Q1_W*J6$0a^vPXV?RU!x1(TrmBmnCgH=bv^{X2I0
z$kN#e7iIq9ZWl$gH-pL7*L0`Tc?zEDp|7RQ#_u8WSG%GMI@QGwjS`1+f(~l851!8)
z4xh#yvM_OMo)du{{su%A{k{A@{JtKj`zHa>!w$edPW;XXpH3vuf83(L@0#NdRdWh!
z4ntm2+8F}*?>Y9`B2X;_q6wemK6E@MGr}W&mnDc&`ZnBgqC?UM!Z`(X{eX%+Z%fnI
zXY_6Eb&N!>{_Q9_q=W^^95Wgmo26qBBx;5aftSdsY7-FS^TnxEaqdx~`$_l{Hf7|R
zc8q}3TmJK`;~hoI2CltXFLW5~g$DFmQOLtz>DloA{g#&~4+`5Fsk6EJ4R)@vvrq8n
z)H70mVlb|oau8EVDJ~Dx+7dPwm|}!oyr~8;7oj-pU|w|>>n@8dR(#3%XJ!tQazRG`
zr_oqBrFU#jh9d{e7#?aSNxEn1$n~G>m`Ii3#O2&Gg3n<pNnc?_jyr<?+0y?Rt&4PS
z(_p?6{l&kVYW`JLqipKp>SU*2Z1r6e{NGL}4IKv@4Xn@QWHKH7;j(MnMApf{WU^K1
zKZ3{^MbVk48=T=dJn&MKE_C)6uBM}#dhN9dAaV?X41#~Qa!}{i83m167Ayf%jJ`)`
zW*$2MKkzXDKk&bQ?B0Et$1csCRos#~$GANDY=3=xy?oZ+*?;{#-$tC}yZ4LA;Iuqo
zH`3fQN(&t}ap96za`M#NHA)JldB3oV%G!Aucbq$=c|R?8^op+<Jf-sDmY<%f?|5Xx
z@0#l8@bHKUJ@y=jaa<G9Fuo`De<5e^j|{c{dxI?CY;s>G{yVSM&}xC5-JTYwHBc#I
zQm&;*Edy_^#C5g_opxE9T|r^`k7u*jpJssVWU}*6$Imr0CIs@!KykQa=Q+&<N-Hjt
z<oyVQ;$&#>Xyat(jV75U&2(xl7UR<rTO1nfK|1pVHrxKg<DFDSVB5M36SFOC#$Pd<
zOVpv5VlzILM!l3%Q8?`Et`q51gj^<VGH$iB97$zz)(!qPj;3EeRZbM~VNr=@f}7lC
zXp<^3F!M!xma-V4rqZ7IjHWB-{wa}6?YMb*qf6j?%C3!hU8=U=i-CD$P~nq_^VfGZ
zFaVwuoThK1=EWw!A+O?-%LY+xNeGXPz4IrUne4*XBlbwmlf?*4bCd>F5lbPtoCY8}
zqh2P~8BGM0we=8=P<4;>O#Ho0rHrg$A4;dns}Zi8@#m#FyXk`!g8J}yQ?=j1L35Be
z-RJRWl33FL<Py0q$rt4wWJ1m^UsD{5ksL~qPI-8Ho)??f3bpnXQuLY1he}Nuk9mMM
z%wvoy8}L~5$sXqMDVyhaXXu^PLK~81)(JZp-gg&U`|NLD2((fdTC+$L#o5xkwZtDu
zmD?8;1O&ufTfd1EByba&52Vnu2PS8aD8Ds~HK<;f;=C*T+7{W=sd4If?8E4e9X*lT
z@VWR*4uYB$k7hOe9>UNb?PwiWZW$HsO)(zVH5vEw#6ta4mI8a1Hu>tCn=<+8Sx-Fs
zkW9+%vO?D}rE2-=zg4UcLnm;VT_W4jvwygKW49t^pIl#oH)kw=H5YoE?B`~O>NV&T
zH=p7~pV7mg-_v&%&*S?sj)(xr$OFYh!lMSo>chB6RY)4ywe3HE57m&nx@YM;Wlj*9
zS@jt!j9<Ko!}=U)aYD%c-jnfbg+vkA@$!d!a|_9Lg@Nv9FB<(y#j*<oZikcx&a$Vz
zeCOJP(dqJAJ55)cs!cnRs!hr6WR#uZ)_GW21MOxWgKV>ddZm&5(Bi4m{XFRwFpIUl
zH>EKO>JyxDOi!a+WXFD7#fKTcoz<s6o$wQ#<)?0{Qm>I5UrWOuXZ;4gP`mW<z1~UI
z-k2<p1@hVwR`0PF&6}%~NO83C0T;ulgI=_|TLnY+a#n3pw`^8ZpSp~G*!y_TLLx2#
zY;#Ihxkgs`NM}VRijEoZJEQQi24<t#ng|&|9q@BRHLSDd7LsSJ)DN&l0z|$O7~oWN
zU5O@f@vTxis#a|!!26U#KXQ-27NT<bDQFU5=Zut&AyoJDxdU71X4KW42$*jZI{?=m
zUpc5u$-84ea|+a;xzglx0g2A5!F1&G)2*^g^NCu^3o!b5p>elZ1L?ntq*O0y^W<|O
zj&n}_uDEvz?+a;FiZ4VYS*~>BJ>a2ZODO6o>{IzHGd@X1Q_RsaWVZN3BG1I>FVBc0
zMIi*;M$X)jEu11DDPU0iMpTk?AV}F*HvJ5Je@`mq(LZZGY0_ChSN@d=Da0ByD|#cd
zAXT4Oz=gRLC`AwwCoFasdIJ1fA^B;S=0gErk@0@~4oYq0j&~$G3EXHtrk+;mWJ`XM
zAO_|M8*j5P96+E;ls}&TFm0HUI16TATW8PhQ;*Jm^=Ev?ARthUvL&#q6`F&$_O#2>
zq27zzmv3muD+}z4?Zgw>D+`cio^!k^cg|H?%hEm<ZXCvv4Lbcd><y&sD{n5%<P+~!
zBkkPku-Cmmknf2cWeRRhi~qAA<@_>>r8CN3ayUNx1*f6dYM#R7$LDrlCE8oOP`VYt
zqQ?EBjN&Wvf`J5kwIV`i1g!q><sAg~rCi3X4jP*7plyTB^H_41l?Y8n-h9b0=hC0j
z&ute%aRf_WK0<+dsO~rEL9vE_?QQV|_ng0a13n0T{$F3uzQ*;RYe#R^b|(HGknaM3
zz-+<ZA2-ViyVcpYYX%88Z`R)2wYYoL27$*KVQ)lrdMeN5G5+ULSH7uf?L;3)n=dfg
zl=NfwJTo6#6g$f!TtYd!$N~&dPW-*03msLY)aYM1U<QhT=RrFH)V&y|FwhR}i0>3+
z&qf_cR=vLwKz0OZdbKb+&^HjlzQmxWsCwyyZnhJ58jv@z!M^mM@D&5sgLeEWd#T7;
zNFYCpkvEFL+#!0ulmp+AcB%-UyQOdN$vClqtRJLra*#XdAY-U{vB*5U5jQHp1cU)#
z1if}wJr>x_@*+75jp|l2$Y{%2s?0YOp8*pA#Z(6UNxim62Ub5cBp@-q!py3$8mpEF
zH8@(lgj?p6Q%aAl5S(nmHJF2~1)cPion66NufUdL5UUao($$<@AzIx6tlc3svJcW#
zmb>^z7JpQ!LN4poJtk{wXvEQgR@p#mbT6>QZf8U5Z3<Q4wmbqHZVBur)D+L9AzB{;
zto<RE@k_0RCL8HST!D0dh_pbEX&`muq3Zr7Tt;kx4|VWlFo6?DOKNNS(Ru~C{6VCP
zReH`8Q2X`2%ub11f>96OM*s6~BlUMA(0^qCskk^<+L`~GaZp+B+vJbRHy^usIVVk}
zE_wrlC=-$g37#8Rm=gzt9r?T%3~L%P=&JL5bI093L}3U6-)9y>_8*IXWE9icDjghk
zs@%~Qm-983^Yq1Xc6JvKW?x5)neHrO!ERZMQIy4q73Lyi(QbN#BOa3OEj)P#?J?c5
znrvArvPC)_`klL(28{DdJ4hZOB8}<=p2pf$M1z%h95fj)nfY1~4;7;9LnUM|bdsw`
zj%RN}bm3Y(TfbJaHTv%X;X>L<wA0irKLXg+KSwcw<H8SzH%o@IuCm_yS=QPp2W)O*
zbz|vU3w_Bx^grk<?JrVf@9_X&4G|`GsZ9;MwfR%lZb2%ErF3}35`;IY2_dxoX7op`
z9BCyFeXi^;SWq6Xl7z-M3}?Y0m!YCX)<unWoC^_XdRl&Ml;xi})OIsAXnG)CwaU(8
zLvr&t-7kiFQNw71j-*tgI-rHf?Rg_6>4FoWxa4gqD<HOCzhm-#rBRwWvP9^l<_`!=
zK@T8k+gICSnLaU;FlC%!s>vjKj;0BGFxW^JTi7haw%f?QBM98Nrf)=OB)a3i!~=?7
zdAHHDDzVvZuVm5S-SJjRTa$(cV~S&PhCOljv*B;l5G7zt0^5PKk$%c+v}O@9k5FRv
zpT;5*yvMUwMIaP8#9KG$3+AarA-Uu@cXM4Q0Nb955+ZGgP$J1w>MNpP_rLj{Kz!y{
z_giM%o=&l!3~-jK8T0gRn2X~31$U)xiOkRSq}WiV!cp{(&gjr0koNa$74EaGB}jMN
zLugYrN7jClhU7oF|0c5P7OD`aS`L5*i9r}R`_*NDGtmhhxoL3y`h@tm3j4mA`*fXs
zpj<a32FZmaG7?h^!bEh)F>@QXJee`XyCpzPb|31u+|pX=Ux|!;%+2@XsacN@pK_lV
z`n%>mrd#}A&MQz|L-fZXv1)rUpmkO0Sy(*s*En*ga1V|K0~<xvdl)oOYUV)rPr>4_
zf&DS}F#7@|AF0^AJNy5cv;QH+a(&%WZNJ^M_i#W!B>&$!F|oI`HM0BZ%phd!?BZl(
z;v({25B_WB&eX8hK+!<m<RHb00;Lv4C1BI8B>Ghv(FhLyn|P2|yGFaR39k<kLIfaB
zPUyR=ZM)c&t^MY=vixIR4e4B4??d-XNZ<kC9>Jy3>M2voSU9kJys>X0(|L-|eB1Yl
zkLBI*V|g3MrY9CS*g>*)7YM(9AD9=|7M>SV5fP>*Rs@(Ib}A$$w>8W}Qy&(bg!xDk
z2r5e2K$$-sIBHkQvi^Xqum?T|J9O-K@{^-yS{!%`1Y>wHDlgxU>77+4UW(}HA2%TS
zst&TfG9c2r^$$?C$~krBHg%SyYlr;JkQ32q`T5#TH5xdJ;ALLo4UTl_Gx4_;-L_bx
zoFeZ~EcnZG0c2Giwm9J>4YPQcjy}v*k-u3l)C1}qHu-_=L%BwR|9V9gZ2iH<o2l$v
z6YQfh8fqBepJYC89y)0ZjZW-=#?XUMre5QZtD~bcb8mrASGv#lqP2CFHgCZ!w`Q6+
zdAy&AvpWs)ZDW^o5qtbdHkzjofZ&cDx@THvOelD62}}WBXt8V7xxFvRY6z!B&Z4u|
z!zbbsA0zJ>O9Un3(d#j(KrfksxiB-GWXGGLrHJj>Tcw7an6)ELGtjlyH{yc;hk?q%
z9gpwu8oHqK1iQk=oHcQiu^zANXXYKRiSW~j#9J+l=2}I|P;`j5c#HN>uAfIgZ~V01
zG80lB^*tF$F?VgLN)#-|Toy=xun!#DpKa5_^^|##Hie;u#K6Zd#b1<+`6F?!*T!c9
z5A8BdJ!U*UsaN6r_wn?AE={ZZ*G{1I9r>|~w1MFQ)k&SQr3!J|rThgs^y1h!X-$o&
zmm4K`b&Vw?ZWrDJnR~@OH^_(CTEr<2Y-#TqmV{TPY(8-mcfesY?>n@<nj^Fxw0t>d
z(6Z`ANTcqCobF|Ep`5e>HGU5{wesM(M^K&WE%}!6E&J9kQhF;hTDe1Upu1W}=#!ey
zF9fxUA#8ASwG5Gzh*$^x!C_8@8jGWiOG}d7jO}!OTjGN?VJ$fJP*tBm1vMY?dY>2u
zRG-p4nv1DG`VBR3gYstx46voAQ>IPK<sTwmOIYcc<i14BFQdeD1^{N&QQ_@9o~z=B
z>sygt?0W1Hssr|Zs@NElZI;yrmLH@rvBQPl?pcj>ioh8<8}lBL5u`o%t|$r;!ZL*i
zFjCEWxAa+;Nqzc#ITs#!P`{Tp&s<gQ4hX^fT?^N%nJWst9`c+1PH~SoXSv`*x68^8
zqtUA+TmE5U+3g#zswrK-qP9UVR8F=G+6YQ_s8#5LO1{$8{o`Xhf<cpFSimhYd@+ip
zRR_xy8H?mp{IRx@O8+CeUPX)6cC^5@jr<7HSgG}aEOOK>wcBi{eUr)C3EEwnV0kpp
zUBcbISkSh#GZ87C3>%EQuO6^b=YNUUGy(Mz;RK}aJkL!x-T;k;4V99ml$YzNakozk
z7LY_4q`{8h_v~ppzSAkqPPT>ix;pO>ePfAZ4tW}_3kahPHitHyztIo8rqJdqIac$-
z1bJedM`v{?Xk(cPbCqR49NcWED4v`ifm<0FA-5rh(As6#`sipj2>I$UrEDjDP@%lG
z?Fi%oncUL8Mgs$zGETysM0>`ws;|yn!gv8kn=k1I^u)jDcG#Z=L{1r#<`DXHTnTe|
zDTVWaJs_^tECVl(8N;P2v{+-J9SC#m>rasYVbrj+AxsAAPI<#%92zA@uZ$^S9gn|f
zMOD)suZtV-_`>Y8#^eNEPF3#{IJIs8lViGd_iIrM^UNd}agMbg$k1DS4&s*j4SNvA
zDjGm!YjhDaALu!=BcF`LOI)Gfv&-k7;ih}&c)&+boVQ0<wON-1s%S2ZbW_o03EPU~
zk7u!*_C;HJqHYP{{o4L}h$##Xs6~Z{jB&`72opH%iD)TwZg4%cupXaL?>)9dzMLyM
z@=;d1j2ZmPK#g1EW=FqwDiq`05>2mA*&vzuLh_KJ$fV*+HD|(85@@FipR_&8r9d-S
z^M^m}+@4=*(iUw}zcl|9BEGI*)f@O>(uu%Fz(gy#?y#VItFU5;BZSAwcV3go)$_O1
zIAg4^fwtfplXmR9UwRhGyTUOkhD(HK#^`Bvx`ZtP8<koc+Yc9-SIHy?@ef?j45i5Y
zHs-1A$WSc`nSyic@e(^MUJlKvVnc#b7rz4+f#5)iC9@OW9<3jlv-$t~K1k_rTa>qN
zR;cLvUL4{7eOoy>8F~K4t14O}(jQd=?UQ_UQ;Qu1G5}l{MKVOVj#Lz>X4B9H5(aKE
z-$s}?gx!Wc!PKZ8$ITpNPFqK3fxn_sT8OGik)Y&ZmYO!V($?47SD<XEIqxgDJA3JR
zLYBOJRB?i#)%iNjd7AH-=h(;nZq47o1FMJpo#t0`sv;9s50fmo1ICs9FXoQvSnq?E
zPoS$oU*<PKpr?Icv|&(PpBfN?zED#S#X%8Z+P;2Z-oRL}8%loa`eD(xTOjHirrEC+
z*zCC7U|59_2j#(1psN00g6zcO;-8#!{Rwq1Q3ToO?pe?~mKjOA8JK?LzovKUnSJDA
zJ~}3*-flyHy22QwV5iHT>p%>s6C_Wxz-ijyhsllUJW0~5khGXHEl!!Y8>vg|V#U{=
z;JzD1-J72-FwZQnV-zheB?n7Z&40EAq$lB}xlDx>Mt{2ntLG~koiz&lOYE;7xzH11
zoQ(fCG`#9Ih>{fNEG?a!U4*rq72wk7{l?dT^HMP%Mv88DYX1{XQxf)Fd?@Y>fbimR
zN6fJub(ynJQQVs0%;}l`jg^}#rZ?KKl|X>ARR2-Cn@Df*bFX$mkoHevBi%h_X#Aef
z^7Fz`Ki%!biY6^zytU=DnTZPCQbelSK)u(*j9h785d_Tg&mvQAKNr>MGZG~}_C8+L
zHsC7Kmt`q|`l9$iG>!EsMDf@nG%)da6EE(RpLwXNXoa0LX)Gn*MZ4E|*GctOnU*3#
ztA1H~3g-z;*@LIoc>EQ^rq#)d(LU_9m1wlJAhGCBr41FRWu>=RElX35X6xS^M8VK&
zl;*pnyUoVNP_pVR+T)*y>asBAu94ane!H-J>aHTe#n{u^4xzW-fEP%8xJu8vUYof%
zQdr5pij9YXA&W&0%PvQetu2vkrxzoh-TQ5jQT=j{m=<!E{-H)<@@Xuv&e8iYKiOUo
z>WPteDn479H%>XsP|ER1?2%Njk2L<xxCE8aXIb0Qh9NXZ&n){_<`)~??KIRA42<Oi
z@o<s;Negooy-pW3AH9AK2cbbRIB?y$nZV|H>{hlIIoSw+rS6Oa7DI`Lk_a?I?aq;z
zU_Zw#vvct-dq>``nos1`1a<ABOMD!_ejg2Od-g`pM`7@Z?V=BX^&L@Q?grbhVwd8%
z6hLY>8~1y+1lEN4qK^(bd797Fn?H?t-SlDW{<n)6v!1=U`u6cbseT<!)$$F?yDq@M
z;<-@tFU@;;h;0%p+~WF7*pdBBTg0~2GoJqPjdz30h<(X(km!R1oPS?7+dH`b;tj`p
z<|Zc_k|8ncJE#WhJBr4Lf^!f46xOQdh6A;Ens+U=rsf)_i-UN5{j%kc#|q1@rqapw
z$Y3?+KT-GWG-r~7P6tp)59Ud4a}XN%+K!Fq{bieJFXAcaHa_L$!wendHZ=TkWE6!l
z7}_0m<JvG}ngOOP;^bMoYBj5s^D>XDZF4EBfN<XMX~UUZYb8e}+dDr(p=64J(?Gb)
zs>cthW8;O0PNS|ChO*eAlHmf;IVhIB#(}kkaqepEBdb}Pk46_%))t_;?afiKBAP+g
zt}df4{d}564Jf(8=-YRjNvFR)a?8kfg9w9lm%P&=&)h9B(Xr>oc`s*TEZv2cB~7{e
zmHKbdQDnw>xc(%oHst4=omXGnO5gd~9b7Dj^gIx{+HNylbR9>Hy~h^R**q?nBzRGg
zEfeydk#g5|o=(%XtMQMENG{{_GFq8&(wBIqu!V;<Ds>A!@@1X9mzo%VK$rN;S;pd#
z@tH#0fSF+S(^z#B`QaS+1Ohnj#wTK}S`9eMdwHIPs&+CnQhJ9U%xoN&bXxs1W?#jt
z?5VXwTkYty?e||d#m?EQwpD)mgQ`w>VmNKcOCgv-XVVC$<OC<@RQ$TL^q}v<DSOll
ze(h*24>Don#dUYs2pkME<Z^QYw^;I%c1v&T$mN`Cazb(aNQ}0l>>+6T1wIei5fO2P
zxU8-4*Ho2y<qXP8A3C`6P{&uuVCFBZm34=Zf%A+z7R<QEEG>0T*;9)G*VhqRtzyq!
z5w<<>qER-jwGTKgR5{xjYcl_E0&h_<5g4c2-B<7?<evIME{yd|w3)a!OE!-u$F&&h
zk6CkZ2OyXl_=k<!KhN_9Jm_hgW1XZ(uuj|LLudo|+NWY%m2PuWc3G`};;URfhD!RP
zNk46_k?u}R!nGiF@ujIJr(=HUGsQ=81U0pdD&y`5Q0gm1)eR}yujoZg<{%{taUs}#
z_8jjPEnMDZQNeK-NV^A0Bl{VZB+ke&#d$JX81LzQLU1mf8eOnX)B>AcXru<Eob^C#
zBvuu~W^fuY#W&)rb$6)7MOW|&XfJ3@J;4vwDg<w+cK4&h^=IX}+6V?1&3W3tjg#OT
zglZ~wN5&LLz$EZHZmPxt$;g*#Eub7b@=ITc=Wm40$R>-hDA}C1Si3f;?)#APcR~BG
z0^U=1B$9UvX5iO9k28Ey(-z&QCREb6Z~~_4K{>@}bRdn%<OzlNs>9!epFe}tL|+Bb
z{MAzh-&+16eo!Vb(8E~qT0aucP@9rJ==8K*#d9Z**`SOXs%)|@h4PsRc@+?Lvgx-I
zRxW8-rjOOZI>GW!2d4e}ji2wWA{Nld9niRDN|ButswHo_p(Y{d#!`a_ET=?nYmt=N
zT%tX8Kr;)!i^SjQ!#)LPRAY$^XG>hTAH7#oJfyOB!|c4z=d#0(?=G;6Fmt+JRR4!A
zKc+r9rrx!we!K?Ea(<PqDKZNggC!=?yp&|AZ&`dbIhc?AV9&9{t6w*cpnw1YsVa2T
zB{trGfW(_$|G<6amgnNRa~8OD3BnVU$8Q6yb>%uAzTWkY)~G4mxLd~37ElSj!{D8l
z!P;ESdvc0)mtNTJTBzErhW2U=9Y3?g!k_TG#RxM_<9WCEtebiTX5ta}?h}uhu?Odv
z&d)E!&n<GTP>+*kOsl{ni(x7unMgfOd*HAY95nv@F~Ui)dQ@FH=tycXbli%j)i&a0
zu+`yu<dQW%O<~kKr=35fEgDagUDoV#0i;d5|9jHOKgWDYXcGIDZ|{r2kAL&O{Og!6
zVq|0UA9Iths*yXg1Zrmjkj&B|DI$qQrDY>h2~?%rcl)9=WbG_Ct&+0Nx{DS>-D#gw
zvX-@<`gG|@?eYDOdyEGah3yMFSnQL~%(TwGTbUeBogCkx0!Iu$9J5NbzjUCxlyBMW
z$|-lIV2f(-hdG?K6df_1RJG9#YfAHi>A+3Vu<ZtG>5ku=p4Ho~N<4mrI?bS|Vj4Hi
zpE{)M=J{Fha@=LrTyzXn=u=d`dAnmC$%yG~Gb~ak8M|6{YgZ&<AqNE6#_d))jAp@3
zCueL4V)D&0+Qg!Kv@O;TK=iwiuN#IXW}xTRw=M3gRj(ghndDnKOC6;GHW_tgK0^&f
zaWz2!GG1R6myGt@n^m*caCc<=O)|X*nI$(E8~0#oDJ;b#(1_!s_Fk9l?w;jS(dc@o
zs9nd}feu&)X*UIBvq>Bdz6gK?Qu?}=$tQeslso1O6u|m!-V*OY(TXKgi5@+C#!VvD
z+I?FG#YvYlN!m<h0HOKhVV|>i0E-B-VgWyh^d)G%A~s;qosI8|;N0u76Ld+!1!k2!
zOo6VO(bGUPQ3+fPoGs7|Z<_6;iOljy9J!Vni&b}k3~)c-Lgi7g%d>!XVdW7;fjwpN
zJ0K0ainuT**9gHCZhOBJKyZ^EXpZgIHw}xiHGNTJzUrqP9{>$T7b1b4%zCkp0S7p+
z>QkMW>k%q$F!f2<j;>z8g<NJF)Y(M-{LIg9{9zBCV-kbXu7V?EuFyxlI`k{PSTQCj
zk^X1hJ~t^_>0}Ay$dgAWcTsBn*K%Iuet7~BJ&YzhZW0onA}>{gu9>ow`EX*+qW9JN
z7>(a7^M$NU)BFgOoSV-sL|}W2gY0loU%c;Lc$a&8!g(4~mdQgj)(3wIJxu}{ub#vK
zCWbr^lEj|_Y{mRcg5g<Al1Ke#AkmMMUq?-saPT)sd?PUTVG2Y<GMVa@x`NOPr2N?V
zv7=SFx1hphp8|1Xogm|6Ol$-9)}s2v=`sWQ!Y`XoZL%gHNZ8F59g&`i;jTCY$a&zO
zErDd|SYkI!o|0GoBW?h*k0Hcc;Q@ktu5>f#T3zn+m;w`#5^%E0j-fPR-lrdV{=(j`
z(B>$|fK<6*OH5<Cuotve`3l(IiMO(Ex3q5?2&dxQgNj1PY!SKg&-4)s=TEo6Za98X
z(}A3Qn1^R&)#18&{S>Ixx^(;8j>9MVSZmRU=X^gc(a5gWM|M@#+5^!xaukTpO)KW?
za9<Qddd^ttUNaC&@8x`ip3|#$Q~v$5^pA$VZrp8s`+Z9K`E7jruKoHi4*Y)>{l9a1
zrm?;9eS+Uc#(*_7Rj>tjN{m04c3Z3#tfEn*Ui!@<^>DUQtJJ=1-_BG7Q(Vfu0IrLl
zU1=X*Y1i4)3_vL-R1IkIkP#_1(!;?<jMDe*@aEi+{oyn==mv`eN4u_EnNGd4Eazd7
zdY{^w6dhl}7H}zm<Ef)n;oSTgvIY}34$t&6TCastk4*3nJ(fUomUGBWL)K~tDUV`2
z|C2KH%mY)S=cTEGw^{KE<3b@$1BE~7bbWFOGcV1U-DEbRZO!<;We^DKbWp|q%Nw!H
z3lxahnqu=Lth-X36iEyVp~y|Kpn^^?p?#(_o;aMJ;Qz^j(mkizQ+^xBO1=$b-?yXx
z&p&1NO@q3cxY#@W$1WM7{$E5WM!alTkmQX}#hPj*fUu~EGE5Z)7&Sx`)cA57-Uu>?
z2!(>>n+WaNzN|i3ZYmeoXjk_a{1Wbd2yNJOCl3HhXwy93;yKP*-a6h8`FuZa769Un
zB_QSumIBuxX#tc%7X7yDR~Z2~>RoXVj~J2n%~$B!i8F@kK<mc{4*tY!KKzW()lVv_
z7Q5lziW|qXd^ZEbK0-TKuL0tV5-U{+GlT*Jnu5E73yak&&d>z}sf@3cB|F*Vr^Ykq
zt4hkXx?_N~&96>O*j$-;vHxjLn37kB{W)~?Q@m&xJTFU6DgQIdb~ftu(z0H)wlSWs
z7!5B|2@oA`WPoZRcRv9IHg^P4h^HbhHO)!$w~eiQ62tX$V#0cm>znd)m&2?{iHhj?
zmdU-ooT%-qDl7^^q{rZlxIEVz=CYy!&_24d+-<a@Dcs19q+nUt&S>e+lDp#O8^J*~
zh`LR&*bkYP^%{R+*@}VG>cTM3O<=e#=~@e9rW@SVcvO?l6lsB*i!rPQHx~zdQRN$}
zNHw~w*GjLxnDW`fxOjpcU<b!rDU^x_^n6yLR;8S?;%}oHwghCSH%H!MV0C9Nk2eXq
zp`oHxK)PTTs{O6n5^U)aRPo|Kx_>68O&#xo*pK!=o`|YEyq32%<uGrSkb#y3H)Usf
zNoRs4i{3ifTT4K>5j)9N{W;`P(ZO#dSp<J!Q#n7E5RuPGj<~)Qs$Hu&uk4@@0*-86
z?od;<#~6cHo12H>2yRV%m7iyfXJ6X#%Lv@Q+~dbJ^MM;}evFa}PUZM`Vx9-xfuio9
zw}-*tkGbVgkDMJ6G=lwr2n7e(!QyAx-ec;n9>z)EF!h_li0Yfhp0tdt<!CiuqN-(v
zeQOy~qN`34EVZt#p_%O+oh6Y$_h)^=&jJoA2aocx5ni^2j!s7c*X!!)CxcDNnk;sM
zsh>?kZRX!W((TR38Ix`K9k{H2+)ne<cdb>R+b%m#I>Yh1p_Vpnu<Kwfbp)~>Py4dI
z{uI1@3<Cr;<<YqiU#_qWe>yXhqD-2}O;4rGUe#KrHKlPx7DRQ`#pt=QLkXnac!G41
z1~)iqrI7nh8eLszVFPUI^>b#3nMV6A{{Ce;?jNaKdxmXuPJA<-Eu|ezw`}Svs;oc3
z|24kVoRJv49d!y<@mjy84dtsw!4MEW3j5_f_-;kUb`Vu0NSJ9+HfICW8ox?5uA87T
z0p6NoirGv%zqXb-8r&VC5O4$gl}oUv06-T#)ZC>46eET1L?;4f##ks&m-iEYQlf0M
zC&UuQ>sgfMJurbFAjyi0AmqMZX!DJ;MF1@mRhiF@wz>fIa|(r@t?rW|Z-9Hn6@=V>
zaF~R&#%7ig^P);RS>r&Hn*?aqM2sj>qI*T@iAdjRfcb}dy&<jTx0j!UQR$YgsEC<o
z^Gdvxoy2=Kw0YvX`o<uRQm!!#@sfxAWuDrHrZ+;i)OGzk=2>g)IjT}U9>kj|OyHbY
zJTEzG4~cP$YY%$Q`HSVf7p-8MzvGwR7G7?5eTSMu6<~QDJ7<ZIjOdm7nz`Rnum2OH
zJ8<KN7~uKw5&5laB33dUqVEIz$5~iARMRpjE{rNtBsl)QVbL{(xTR4A{6#TE_#5PY
zQ1^s(T;V&peN{a?N9q&p-*jiDCl)h`MgvlOt;$}w`yhJ$-7L~B^v_J^+eW*byDJC}
z=h4eQ$^KZRIXh(FB+%k$$~34^nLRD*s3@$h7g<&B|4W&bXn>iW{T@XNzDH5g|FhTr
z%ZU9SJFTypwey-dDxWoIUxe|np-tFd2LtvQ!?naBI;nX>VdP>=$|BCwaWr8=E)XWK
zf!pabS&>w?qWS3$k`Ii`t)!k<LUZ1(c5nCT&f~03n$MT(*lwVWA;AIYE68Wzfs|)y
zQGEAf#vpGapUL4j;;vp$ewwfqVWl1s{$x?!J`;X#Vndw)kgMR^#Mo^ol_N(tQB}pm
zBJv#`K%AXRd)cT%qYq_wVtq{Isn*r}>!!`>X8U-Ju9rIPBKG>?m|tyMy-Er1Mm4IN
zG%ONA;kb*}GCP!LF#|?)m+81~JJorNG}o1-?_}3B;~!ZzED*7Lxvh}8B@G(+>cVum
z*1~e$)Z7s-6?V2Rd>+up05hhFq#whr{T0UdUQXS%rZUy2u7=j)-6x)ul{sC?Xv5^P
zR9tf7e=Wq_#!p>TRcx2lTIB}iUY=wp$py^4EHkQzpFFUNA`Qa~P$Q>sTP?a-_FG6}
zOV?t!h^82e<*kguGKsc08KX()L5xJ_R6J)HO+&&U%^F=xsM$ppR0^*k?lazR!a0+F
zwf)j_y2OqtEs15Nx4f{uW%pEuT6PTD9cp()pNYHSO9y!*{f^9mawNrE5$xku2Y)Br
zm0%@%56%XECpIGJMwZLYBOS4TQY38<iiB^2rLo^$OU^&7I#8+>b*hXmJzOTm6-;m#
zneY5XpF@$0pcmMC-s0C4BRpOw#rG!q5Y}$MGsQ=jIYyAFyXN)~<-VJ!u9QM|CZ1cq
zrmsRvrsAhS=r?PUcTWC2u$<19l)CeaeodJ`{yjQih)77VeURigOMP0~uWdf6JVvv{
zZpy`M8#bMj;p&!fhWBrpOEA;PL!?O2uHhvVp*~$7?J}m5XJdkrG_o#Qnzy+u^zySN
z;hH2ld_%0dxx@9|$<XHsC{}o_gB}eE7$ZuHWUy>ggTicz4u{T#-wYGWF!5tb=YMMD
z`1AwYM1YFM+pLAZ!S_AaI4L%QK>FOVNbXUq2;5CShnp4HM@YSpyHnj@LsaMnmXTm$
z8MuH9ScWVj$z4YDb5^qcNdg(CN6Zr|Hb6PfYzj(>5c_ls91TmbM<RT)@@0%}gzc`y
z?_eF~@ajGhP52ot^FnBFO%_NZ`Jzb7+IQFyhbW#p{o_RSMuC>sKZxe{AQ5pmP#RI5
z2(9TeZ!(tgOQJe9@v54%E1Z%v4uvvD_yjl7Xq;~#95r(|w=(@^n}nro%Jh<+=d(@F
z)MQIN>R@JJH#UFKCXkrlFCA0{$y(<XQiCuDte>|~U_R2A^cS{1;=i5fo2UP`S)=mv
zQ(^pDgtmtN|D*NAElq8H{>OHZr1A2tKx1}-N|D2Y)`g`usW;F|p`IEyTc`=<tN}^W
zHbm?pLcjuKCCyYDA~&BVuOc^f+ONgsz2*HcxjtZCr(IgF<&cqK8~J{0bsXwke;j!y
zY=6DJXaI5S<pCZzEB3+wOt_1Nh8+-UVQ)-CSw#RT*%wDy5s-Oo_n1hcQe$xuJovZY
z$O<yBmbrZ8L_rmpHo4apoB;{OZqog@@^6mflAFc5e!PTk_^2S}gLgzoqy5G&VVeAB
z!zRKzj<p`bg9*?+Qr1>u6O=8~+_o8v+_*fyFA%-M7gQxwvussn7TSv`+Po{gYr4Cg
z4WCv7&HWKL^r8!dMpH9$LCDVADpi-*l^<(<yU3=yvb}l1M-5(GY5yeh$1Ha^8CHSD
zW<L5k7qt#WvzTBk+R)6Np0&=sS?e&v)F25EdTydnhVl*#JTtV=3@Bbt&pE53RQQpR
z=E&mQgQ<{C3`%218X7GtT=Dl0{+lLo`40B7(nfk3lQPO`%vNw(#)+GTOKy7cM2m?^
z7lj8G{FQv)*@+W5#bgv0(@$&^(Yv~j-?Dv2U14?X*H%5LxoQUDR+O5piF9KbiEBwe
z@7zWuh8i+usR>#@=+2`sFkG3|!w9Z9=9pd46#jM`t8U1`u+~#&Dcx3pd?i|rU!d8*
z)1%3iC^Id{LpS!`THdMXGhAi$FsX3bxd9Wz-=a^S+meWUU3nphl>BD@OQ_|kcU^#E
zkTW3GC91pEbZe`6n-OuW)*%=p=1XY-+s6L}yUuGlA%(p_<u0^*7z#$L1J>u=M|Z9L
zLR*kCNK1jj;f<0PgDpd278xcp{P1na=yowG^-vm?;Vj-W#jD;?!^5dj3SchXcMDGa
z6|?LRgQG$!GkoqLB7xxLx)G!gUKvSs;)E=vB|1r8wqZ5?7Ts{_fntB^fphHA5%iaO
zYw;F;>fD2X;{<akECRvBCoI;*Mkq8QHl$nuQYPDSy)S+s!;Bwo48z5ToYpsn!Nn)M
z+Qlc@-UYX3le=rsy>I4eA)0RP16bhfS*oC+3^!@&wjz^h&#hJ+tt$a866L3oRHLu6
z&J#LJ$|-t;%e3L>Rm51Kj#*RB&vRtg0s1<JxlGnNEhVO+%ccE0!@*_4L*>k*McK_h
zADIww<k#k^3k*JD>h-+o4A&^qAWJCy&*Ttz<sQjmxd4;5Ieq$S+}(jDXw5hBQyOaq
zC*>L@UEtl{K0$Y5@?=}{1qpmbf(*X(x~R}2_ASCbgHtnZg^_~2Q_zO*?GJZFL^v|i
zq>9JMT0?b}ElH{ljXN6=T(>65PBXRhQ^J)?XxwI>$Q{DChw?P!?NR75Jo$mqNnuqh
z!Z=`Uhro8Q>TwmC?UQr2#)iw_B43C2TiT~qwAU?3@s;Dr{mtiJJsQAh>PL6P5dG?T
zGq$yWZN>AK(Ic@0i-rtvL{UiSz6_0T3cD!Y6>j;D%+lq%xqTu!>g)#B*2p5z6B@q;
zfI$d~BcD3-%<~_*+V1X~uI^mu3O7OF#pU7JiV}CpMcZ5%!`oauzM}J?Zb55S5?T2K
zU5tWRDHrAa<9s900c@`#2fy~G1jfEAcrEN0gq3NYB<n`u<}<L(n<2N$QVSjq7$FoF
z`@5wP<L}WUZgQkWnJFtyDv<+)hBs$B%w(7`TD?}N?@IW`#g_<g_$Z`%i_!rJYW+GH
zvoU%vyEu*6y5o;ZwfxvSA~UUiU3aIWLHab)#ca*Oyaa!KGlaDYVvu&OJ8fqcbvGr0
z0k-z=)5l07L0A&msu0MEILjso-U(CF0iATGU#m?R%w5vq&mdddiafF9Bwa9?=T_hy
zT)HSqA+^8K`^&-vcCv>X6CZpvz>d)UB7)rWFi2)RR_XhNlKY6zrzz+EAGY2x%F-a&
z8m_J`+qQAawr$(CtuEVkmu<Vtc9(72{%YQP?|gS=?pjZ-v(Dd)h>Xa{*n4wK%*4_U
zGQiw&d86#53F1(uA(bkC@k!>tK~#<*CnH=2>yapf*r^H*xdxILz%w{RCn!vnpB&-c
zD=i7xDI_q6m(ikS1N|j8(2g$eX{#z<E-~zZE{PySWsv=4MOu%Su{G(pIr2iaA;lI@
z^m<(^rH)HhY=b`AE}!|}3@1Im;ZU5LG+JrT65+oT2@kqjfuu*^(G5m2{Yz;|ZGcz<
z8lqYAuufF*0O>eU`I*GulG5J_9q|~x)C2efeP-7ZR9Rrh<xZWvRBId1w+$4zQxbj%
zz={)r#%a7+lE)xV|3jJbfHVHW9LN_~@Gip~2>wicW*yH)rXamQ>6k?8=*&H>@UyL3
z4b2^Uhey1*^WGtKe^TrtJAn-;pecc^XWyo4`+~x9%)_aSx_(K0Xp?Ys>iFMimQ58+
zHgdlc;mrRzjrC2pY%gzMX7Vq&Fgo=gxe#Tz=cj}&BMdFTvQky63Iu9Sq@&Oh8l|P4
z5`{`f9|;TzgVBKzaj?j>X5-0c>%2Rg)p6Cy^VR7q;FC$G+m0+e31;=@5zSMV*H%}q
z+m_7j@K(<|*f!XEsUh?wB743e%2RnO{F9h`FnAwE;zuhKZa94c)uN$b(Lmf};h&@k
z&2?l^$w!M7X{ifLR5DYO#Q4{g2tW+fa{cru6Ar2ePU1WV>ac_0Ah+z;38nvhJs&YC
zd^vk$C<vY^0%KJfWxr}|x0xniam!J5;|i_G$%L|3Y=>)Rz`|RH7uZnytCtto>m|<F
z+3|&{{1IguCmS3m>ywj_O=|KehvyLH;$&x@eqhD&mPc)emAUox<rFDv_4WCqGYp(+
zWylT0##-`JVKxs`xzS`l6tK=BNLBT@gjrPgyLDe}=zWldxLIYPE!>%_=2T?guSCfs
z6zkK`oMncMoJLc#hY8_opB@8K@O#RFapV~*101#lk5H<Oy0KDjiB-$12{~D;W(J4l
z8S=0qL0-cW+_9npmcjYgdksz4mFkcHm+_U|91gWLJGoiflhu&K7ABEl)Y3NZeC{$_
z=lgx>njDt`Ge!HZH}|;_PK(X(KLx<ClrzOJV!+3Y4>&80Ln(KqD_5R@5=yzbYPCZ`
zoa1ZsYv8iy?8g~<AsNoL^d>7Y=q-%p1X)zOWTqW_3^pXVpbt5p25dNT;wHgfSm?M~
zk2zNHh1|VN)b;H`1elUeMg7l1Ab4g8$ifT*a|R2lhOa#*n!F6wZ3RCQ4t^)MBo1vh
zPGp6Ua@6abTAGVw21KV$kol7{bB8+t%UQT%O`9)ASYc>N_RBKQioj0q;((@v99?&Y
z^dJA7@s|a)N(H`}D3M{aIpvW@v7ybu-m2UI?JV6P&6>Yv)J1ex>9=wh=|3}{zXsbx
zahK{}a~JC^FA8Dlq+ZzUe*?xhe@4K!c*e+u@GjqB%0hqE?ni%C>{olvD0y2CYrKwT
z{;K-phSfa`kE+j!#4)^>D9a&`Ok&r&LCjQaUG5B|bj|-obb|FYuz%KbEXNQDYPT*G
z9g=3ZS*=yw%n)3vg=P2T@iCcN9%=AWYr?Sm#u9Vrk~qaYM8#B<4o_HU))2Bk)BH}E
zoJt4p0FV>LPUNOO0&o->8!dycir1BcY~1$w&P^OCBBftqIK4uGQg^nrPG^o&?QO){
zaH!5P@(32SH1^l<ev0+1TxGLJB72Z$y0zP^)sIsWlF_Vnv|7>rEK=6TdWVA7vlHB<
zlGQXsjVPELR{<nfyJ)6Z9H*Pq-d?f-6lr;$eME<9U51I)GYkiB#?~*>zBH1H#e5P~
zv?dZyWcMPWivq}cVD73)E5_ExM=}k0xqLp%b-gF#$C?lK!6UymZBpC1Bk^dG8G25P
zZw^n5lHFn|o@LCSdt^c7$s;Mn6s(1iQ3lwn1;g$RMeZPuXd(}n6qVweGA~>uUn!K9
zYpmLUii+@5K$YnTu9*U8T=H@WeeIi8R~8kJTeB}@D~E936Bhl?C}z2Rw5My_x1UAN
ze}^=Fke8pp+slW8YeGW|2hQs26aqcS{B8c8!Ij~*$1BINxZxj{gZPYCB}T8!w>9}u
zk^h4gw37CBwM@*}8kBk5rgF!owUO~3-Kj?hXP^i6u;^pLMBdlsSX&r*&yffF*dviy
zA&%7(r1*kNd#0}B0xf&$DhV%#YB2MrpDhk31GH>o<vq0u`fOi9jS#DqyAp1-2jD5_
zb#rn)$yR$yjO)59^p!)Pl`ptfZ~M%6aTg=Ni-Unx&|zR`wY+!<8>npI=A~5%0DYnL
zg@_R7WsF#1V(3;zJZ?GBJah9OnWTePCm*V!Y(X+2cn+2NT*7u&qCZ7z@x)qL$8?_N
z8yG~nW$loom?XL+F0=m7!&Db*i)#F}-$HZVkri!5#um4y193SOFN8F(ggPgmhj;)n
zb8Ax`9#<aLNdc6%rasLG6TzN)ag-Ocd3n$!oT|=6rIy(WF7P>%lFZQ2Z_hq_=0V^<
zb^JEzlbi@0Pg7#n=ub}n3e=!wc&kqMX0KT#rRCn;DK)o(k=jK}br(ibD>$tmxFQ}!
zy5UhDT{I?Xi27?7CDvx@q?RL#XT&-=d$C8tGR?&yvBC70cMzYWWc0mK$|uL8Dcdg)
z37CcZGr$wiPhw*aKMyvx=RMsy{!2RjA0Nq+=eyY0JACrXmDZTwx!;YN`$auU)I0RQ
z_SuuA*}$B>`#yc&7d@i?=>7aR$N9geH?>GTobL%PFjqHyMs8pden|_gHDkg!8Pqbt
zXyA=vVb{Dk0BQ4XE!Us8jcgZJG^904tBZb1ALXh=vnEULMbS)9?C6K82fiD>+Sd)P
zhJK`!<*nGQbg${Ir>raQqxFW=mQCM1&R=eOdRSn!#k&C^07tCbT5N?-!JoXe-7z6V
z+OVXR70WZfck<=+=B^>R^LG>+g!_%Kc*Er&Z`^<8%v@V=Q~u3G+@;L()jZCbb-&dh
z<fR^P67-fGJYu=mulg|^d4(SIod4^F6jpBtTF_gB|GoVBnPulq#Ngpf&|7Fw3hVV+
z=EuGkRd*FZLX(=Rv{-Lm8H>?ISh6-XHZmu54^n1=RRNSIndzR9*<NJX==1yTdfUxl
z_=&I~z~jo$5rC-nTbjZ=qxMc@X{3XaSc~y&R7F$Z$k@=-^YOfz*l<O0h6`Ixx@0$!
zZAHZRyL(r+68>gIi;n7?B|%D3ot?JY>0S@Em*#{U($B6;o?~WQ96+oTEG1;cFswb5
zVX;(ySGS^ne6jO5!lSzBC`b9N$y$UGM0O4n+br$CqA^C3KBZwqX$7U{x;jz}GmPY<
zhv}$B(|7iOQ*@y{HDVUQ;J?VoM%KxZ&0K^#Z!e=)qfc;3*mAz?ZsD=wJTXyeVVoA)
ztw|_?(^mT1s>UcNfz464@Ng1#TC4$l?1?fwU02iS9%{9#NMoLc!$WS=g~pe`1`UKm
z1a(VcCa#X$33P48|9aKama%}prLga(XztmZ(tMUxAstCnfU(G+n+NsrRHu2d6fMJG
zK}X;!b)3$t$FL(P-NYns?sj1mb3LJ&W&Ccgf%xEDLM~{ZvvKFA_TUr7uaT~R4~5vc
z`H9&O2MJCkwN43sW0QCQyXllJ8})waW)fb~-8m+eE?H~71^bB#QCfqP>}uyU(!pGJ
z1HCN7IwvoXs^0V@mH6P3%R5j_>2qET{?0Hbm_xwjqBD0GBIjQRjml5qVYwS7;L52^
z`)em${J^|jbWY!JHI;YCVXM~s-6c-nz$F)Lk@UmYbWYsN<+&Rz?b(|Ugr)0<n5~^B
z&Px&P;YI{0!OCB3wRb}UF{zc6fCbl@_EOGsF&0PlVV8G-;mp2${NOLXljjEeTlB2C
z8wJ4WGxcY&5z5o?bH|?>cxwipHQ6B>E-Ne##CblK&R{jv_4dTJx}y9u3=1}KFRvm_
zvXVexwHZ%h`%QZb^TWBB^l~3?t0hXVfoA{kN(Def1cOcuGgm%Kjl+`ehMEBJwCWJK
zrFLp}rpqPREMBqkxK%uZd+3w{6S|Z~LmDfTHg|i=PBwq+g@=I~j*JS`h4VpqBYo6q
zwPPS<&%*tJHQ$>X<%(NGG4<<|zvjqE_o!65QHSfQX>HX#pN=DR(ZxM}<l?m|lVyBh
z#9p2z%=tND#~#rNnUwvequSFi@0_@3>5z}PGUr${h9g7PV{%*p1j@BB5P#vhiV&uu
z2YKiESiT{|0(ttuzHKha3O@!=@ulh{Z{u_oC1<Oom(DHi{`&FTjw~8fOx2Ew>nS}2
zP&eVX%~3m>iY3a5e!DvrE({2f>Bw`}*`+9>gKuaqTg6@I+RHxX_B_8xrg<=q3vo!I
zygmA`i+7*P9*@c$;nbEE3;yBzW%<h+yC0uufCmWSY)~P%2($SZykp9KMxUM97ulK#
z*Xda4mf$f}--k_ng?!D(Jbu1)0q8dK!Nzjz+ibW*ltO{m5km}QTdGX5;<9hNSO0qs
zx2|YE8W!>{mmlnE->>>`pV7~yJouDZW+be0xa3)FQ7Jcb=&Bk}UU)<{`d~1UnE~02
za7@MrpjQTRA9#;PpDjaYe8hSJ1kyf4%@Y#%MD{?>9IM_?3u<Al-^#}4^FLP4-l41t
zfKB>Bvy$S#LFQ=A{gAQhnwyemKauV)5qjOUMGJ6nmiq*Lbm{{02}1nghUFFCh9<cp
zZiR(MT<n(~A^Ls?fzoG&(EkfhFcCC00+gW+18=W9eaDk#LO}=y52p0Qda3B3`k4)_
zCQ?KovcyPbf)5V`=kyb%usdpMCyo(`7r|1dC#v|<U&c8)h|cI2)VP{6^dOBgd-raf
zyt$$CxOkd$P)(kh0dh<8LyCizTUvfpzsWi;#gLfzU`I6wHw-dg?U-8U;4G-~^;WFz
zzCC&FC&RKfoqcY9nhvAl?v%^ra@=~>?Jw~4{egLYG%eYBWpXiSb~!z)Ipb#%Nb}u}
zZNf`vsTAj+&p_Q^oTg?`5)s54FZF~0J+m*QtUOtDt4qG_aIV+%JFc4DW7iWlq0UwO
zyh$;OFNW~T-)=<wOMp${Mm6$HBwvY-HFCZeKgn`mcI~oqQw{0i5Q!IFjwraqtM1^R
zJ2LlnJFR{^+^(_t@wX1Uo&l{n#>qT`R2~@%PcSii8uf2D8s6Ozlg7W;FqK!0<t>Z^
ziU!{$x%!HeM2F1o5Kcd(3J_7fk?FvjO2Ud2eafEWZqsp7qT<9%-W0`*;q*!k>uC8^
zjtUgjAq`DG;Quvc|9Wyr7yecvA^Rq{`X_Ua(f<I1{-KZbFZnuK9r9ZM9`&2p%P7Hw
zfRK=%fIu~fgrty26c!D}>QB?C=uePXOHOMt1BV9er(gP7R#jDMRz-^uHeFSXP2id`
z0gZSn6-^6xx(j%gSM$1$^-v!@=_>{a(#Kb7{%l)bcbiXLPkzH}XNlmKen5Yqn&<;m
zmaBF`Ab5l2#I7;d)kBchl_N3_JfuKcc3Cwx@Op8dD?_-5jUORivf`$pUj#wib|SI5
z3wOp5m3OJ<>0!MFw{1O~V&iH=?ddKfe}fVuMj1+<KSH+YjrbDkA>X<X!kfF(2N2hF
zpumsb84zwET;YXpD{zzTXF87T3FgFo^pIANOWoLT9B~QeAgQ^IpIWTkb>v@nSv>Rl
z66|BbPDij!hd+A|(uJMEvvigs$eMtGO;mmZ1#O003)<r3iZet~q~V#})UQ-2C(grc
zmY1<FPS33)bLX(_&aBl+8(|f}P}%ky{+8_Co0DaaW1o1(##5!$S76v4pOURKy7s|Q
zVd<MXLO}znE+2CqFJUhdQAEPw7S$!QZOmS&U^<}cT5AL~4Z1I+#Q>*%;*xobkj!{C
zndf!K7=zzeX_sd!7&Q0@nX&{xxo`*T;D^d9YOza;-TGhLtBaF}>^|Y_Bx&-y3RGa1
z2Og+i^Wxa&6gQH2(@y%<!3kUD=0a%+DKdjX&nRm}cQK3!JDJ1%nY?W378Y{Gz(CP<
zx~28_iGu>x{`JW`opQE#iJ}Q%-cjyHxQv&obl%`%POha<>AO`yzn*lKw|1F=*doNL
zr>cO;bUNetsYPjKozh2&W_a=mLXLp;oyw-fzV)&x0;Dpb7%KC=2eLM(G0D6>rFNtQ
z_gX?blBp}^5C9N!=YXr}nbk9*mm1-&XWaa?5nkL#A+l)q(q@grn7vN{Z1@7hlvD5>
zx2iyk!trW-v^$mZ%C?(V5OWL(W3cnh_6ep`Cj&C-&NG@5jUeP(Q3N8C)3P+mZBR>>
zZM~I}2!lj7N9Kf-GY=XP+Ju8qTt|qTy4`!i-ZFiFwn1bfHY^`uE)JkD0+BMs(_#|`
z=d@PC)!^he!+EEUHXDOeH&7@OTu@QqXrqzlAx99u9a{s(ya92WGNXdAWb`?|!3@eD
z$%IH$QxDlF1|Z_xc!IGA%kzXdlNx$2Z}JNqD%bg4NZnj!4w(uyi)YG~Nt1dJI$~_b
zVCz2Z)Fc`z?y45;N*8P3otpG|0hZ3nJ=D(f9Wzef7`<b5$SZ<;O5Ce{EVSkry<HXD
zp1vBEchO<H8?Io?J=_HcT-teu<T!ZfHsDl?=dKvOZ56aqxj?j{MKqh0%DVYYGvv?G
zJ(hQg;oKW^T))As{edNxcb#FrYdPGnzMd5q&doo5&fXwGoQ#6iD30NA7lNC?P8@Dz
z#ojiA>Gh(5$r}I^8b<;Pox(kt&Z50v1D<iXzmuvZ)qhg`!qWj&5mz%PIw@C9KA|E?
z19P>FJ+{o4b79dLfl;ZhfMQWnOD>PFm|Fm-R8_24!qK%0bq~yWB(~&MACBd)Smsn8
zR%q(b$RBjp4IP!)NYUo9AcdML4-TZM%YJUC3#ZW;>fo(YS0T%8wcRq;*gM|b!m&H6
zGMntH_pbg*Z^3p~&9)Pp%s#C)8=Y%z)xBg=K81hEmZ$31t|qzZ9m(X1$_5B}U01bu
zSf1!^^477Fq)PkjK2x%9gpg;Bnz<X@c*e^>z2pgY^JGpMZwUW&7nA<mwN|Nt&ieYg
zs#w-fkuBrg=J3=i(A{WTFZ9se-__?b_7FOh$l~MFIA@n1D>R3mX4&)2U2O89aTgGf
zE{kwuqBL-ZoT|;ZcA!-qsYlXqX1K|e4HplYy=-Z`7A6yir6AyHJ|gYajZ-{!)Ebfs
zJZ2X0fplqxJGx!@xK=P+lE=!Gsxs2v^n5Od>Zo<1OzV+n;lnP$+*y4-Zmy~WQ004>
zC+sWah?jA2m8RWEvbgs7m6|Ttul%r=`WuLjp?zmv!}@D9l^}5bL}V?Gt8B;5C&8VV
zLZ-dJz*VQ~E4ajl#nHiu-LAKf5x$tl^u5#GhgPOt@BRlTS?j=ippldef0ep~W0-d6
zyN^<zp5d=4e45Vw*79D<O8>r5?NkEkZdfZFUdc}s_$^uA>`*<qPi1&~V!w<~Kk^qi
z_$>+E%bkvq+{>M5f~|`kwz#e1o0{+(>b7bLHYKDI=^Ks~1C;VSC{kjGC_;C9LD=M-
zx8EB!8rc$>;RF^@ji5{?dpNJIDZxP|u?-F}Mc<WZl2^sq^IwX!c59HLDuKLA>`M_1
z8GDfUQ2HtsWv%!Q(0Oc2!Gf7RFMDqrk>(LO@}9<K!`6{$0=|-FBAxy5Q7fa;?Se`c
zKxA>(ePKvRPv$2mAa^N0;2y@*gxHVc@%kd2JwSJ9e$DjNi>OOSC=z3eIHAkiD2}O3
z2v}<-)b}$UBh<Vr$SX(e0%(t}da<nn&#fsK;=wVrR`~Ka5p|$lQj_FN4N?x%WF3T-
zSyi46Y!|(UDa6me>mV0EIt#TbLiL@U@4t^Nmy372O&~?!aFXhs24WbWd0pbxo=c75
zz5<*{Ww?BomPh-1#}W0rjKvjAj!K=lv0D;a@srJ1V)Dyb)RC=MLp9d((w0sa<z>ua
z8lKu+8fMD<?-XKCmMm}vi8JkgC`{BVIiht}TomXuE^?VC?}1o8I+X;25RI$+YQ8gH
zm_}GNPZskx@Dy6T3~^3fi2YLzNjRz~Y?Nj+Jip8w(ncc?uL3?SzlT1=ZB=2=-iUm6
zqYLO0QIB}qW7j^1G)TtoqhbpZB@cROT<OIHZp<9ip)DLAehK=0jdy9+BA=O<tP_3D
zL?7TmBd~x~4yRgJ2%i@CQkBQUo(<fO_%}iy7BnzM-yb4HDb5)LTBP<@fUlT+5X61R
z(CIb>T2lywkU3urw-t(A1-B(gq?I5EF}o*-REBX^qXmcyBl<a>*0Al*o)O|!n0W^d
zp^ku+b|kSorLJ_nX_{vWv+0G2li}s?r5RIn|F#s=8wpJvx#*3g&Fvo!snEN$FCO|X
zleExNNLDV02Jq@fIuo)`w|}LoWa~Rj!TRhB@|3+nzo?sj!Oq|QRCUqbRUJN?!}HEz
z%Hyoug=4-H>gwZV2Kgh{GG+`ju6a(Virb0E$%2LYsP9`#syvyr9+Dm{%cPB^=mpoU
zy3)7=TIPQV%vO(O+h@2wLDY>(ZO;`mj0<uk8#B*?v*xDwM2j9qQGO$!Dj}ZbOvo`@
zRRLF0jpmdpmA$AQ1Fzvi%CAgl4@BN@^rF@kj`RTLw0D8>8R2N_55kdtsWLuPyKd>R
zsO8dWp$$Iq>%Awt>;A~L`l{~Tx^`0gs=V@MP!7gYdrFgThsjvQ{nr__H@r3PyMMvD
ze$<)I*Ra2uXT8mIK<hL>-%zc?$p*#F4i=202-;T2M5cB&Q?ZBAjNBb^OdMZd6K{X=
zncngSf@@oWY@ea{NWqtqd&=t*k~&0*1M!S|G`eGUU}L}9A%QD&F<}nKt$|!W?r|`W
zK_9j_A#i;v{XW6!1zdXKQHAz-PGL?0tCw%6S`qs?>|51~Oin9is(C^Ci5dJH=|$mO
zx|H}N1Lk>Pz$@?!)d5|>b*K&sc#XGG+4`x##05vhz)&067lMYE-4|b$!XRI6fAM>>
zw?kcLoJzQbvT{gcrsMr`++Luq^UytNx{W!uq7}zXonB#(Ye;eW)trEXD|e~ElI(9b
zkGFLuakPzC<^_E#F6ys&mj>VVrk-M|TxgoW0Cs8qq&YAsU%rC%wbt5b)pwJEXZwIo
z=|nZSOwhp}RIE#1=>J-PPu%i2J$%ngnSJ-o2>+vJCSzh_=lHLnM<10pr8QB6&nfs(
z2t*{V9SzNd?%JQNHsXmHK|n(i5+A=Yr1Rj3#zPU~Y1DXGcH~a{W+#}*5~uE;4*euc
zyb|(2q)pGeI=VKx?0VLCx?Y|hwt%_=3JfsZRJ;oa13h4$?0et|0Rz|kF_Bm)wE!~>
zSn3fA_b@RA?+OFdSQbUU5~3Od1UDm!kva+yi6M>oYo5g-ii%}@dFvA0BierT&p*$T
z;$D6A^NMCHXD6LKY}0WR`%py>7aP`P^6FI8=V+-=93k%8hS<iMXtW-2C;W>R&B3!M
zsQr)Z{q9RHq^VmpcC@*WdFu@sDeC$Va-`HS@s&!%xxJkW`yw?82bUoVO>Nv`46q%B
zttr}W$%u7!O_*f2RsC&&jVH`lq793(=<fXv@D05Q2q!GIPQ6kKKW(8xPRf9q4+>Me
zx_B;cS2@`%lg#SXcq7j27^AfJhY+0Jmn&GnXr9*hX$%k{saP?UKeIA(2pZeU?sn*)
zJY;(gGzE!BVqQDK(W1dbRB~Rpy0%?>jwQ8exm#9Dg6NyOPDc%Y17Pkfdd*)7%S%g_
zi4yDg+Q|IOU-o<Yk1M&whC`pjXrFi{DxL*~SzVRiOZUj%D~&)3U@G&B(DtZ&XX_$y
zapm+&OHZFsiX$@MVsPt;o+4s+{vHgO!StK8X*~kgdQu14b8*YaI`|nAIUfy9*yC<;
z$6A|Xz@%|y!aa9&{z@%0+ruhl$tSRzIyN~wX2PUS6A>7`^IL^F#mcR~e%hrrMYE!o
zY90}_R#hnRSXuAqFB%;aZ|%KU$3jVV*Vu%5rlO-(0xVaIk+KM%nfBs#6lcU@1DdR}
z=B+}kx?)NKa3Iv}ad*;UWF(OG6Ua4lO4(H}f>eUPPBC5Iq_cigA0;{;Ma)_{Sa3Kw
z<QzfsH|N^N*a@!`6t3orooa3HhQGoJ?d4oFJvn6SjvIJ7^@okW7OqeX(Zhwno8tut
zo`ow}WdqUyz6ptilnF7j><OH}Qlto{1n*GssbR&tV^XDb-Zf_=dtq@|<Q~v!$E0{e
zKI;V0c+Y!A-n(mxZHY;Bxic8W4#%e8bpa?^Wqh-28~%?_AAVWkonaIyggxL6uQ($k
ziT-YE^;teKl$<n@N92?x2#ST#aff6dB@FXCPx!Y=#=<G#GX{0i&dgX#To})|nwGB+
zsf>tyh#$~_YhtZ9J#zxy%!2uW!r*FV`|qpL;6ilXx}<&BVeu35<-d57;{IUOb;hhw
zAQNM||B}JK5wyRA+!c9IetrkME(?7EqUk6U)1KcFT!~R`;Cz^dDQmPEx8NVqjHnKe
zkH|~o)@=9&YkbBo|9!@C@hi&u&o@pQ{u?Jv`X6V8qKT>XH-p|cJ@vm`-6OTM%y50-
zNjR9f<RzhPnyY8L#Ph5l8DW#*=Hx9T%>DaV*=`VI37AI>$jRl)=i4$DJ<giTvoo#C
z%+xL{&qT4<vL6;tI`xh|s{I0Er=7dEI3%zLDl?~ry6-l8Tb|swLQ}uq-XnoF1N_=&
zqa-kuY;pP;u|sciL@4^bsO?kTyQia{_H5aBe|fEIZ&HW68LwZ31dKv)6Xf8d^oN3+
zAhPjBfe&7P|3t(>!r{B^A+w(j3Aryvi`h#9_$c9c4~9w{)}zI4FzcSuuwx^sV|N=+
z-37Vj!Y&dG9}A<V!;dI2<sgT0(Ie*>bI_)l`U2=6O!bl+R0F81V`D5sf~-1*7bKcU
zZH9Jf4CzvEY)+b|G)XJhanJEN5jL?eF=5~zaE~#d?t=s`c65Yx9_8f&P~zk<XA&uu
zWHNqQa#tlIS2qTof#^_RoI*TK5hJT1A*DQladiaxe^%#Q<5aUWi#jA{+^2(~=9ASU
zN-MI-^krOzpH%J9grG%RAu<({Yk5K|y}{LVxDF&RG{^|HwK(tx5dV&ib9P{p>pvN&
zHNCIscaRyGBewMBXE8L)UUN<}s!Ix5X#O3RBVIZhuN<HDtBdsS9GpdbqeT91x9-(P
zS!ufyaj}Aoyy$z&Qwaa;7C!R<=}Typn$c0{3P?v6Otj!2ROp-CY|6;?zu7o?keAAy
z>?JHZrsk%r;<_47>6ZKY;@P$5sW&iJ;NK#%kOo$gLmZfT3AN_ROhOrdWCau52wkwG
zIBK-<@p3Pga&{a9EnXIKB-JhZH>^%n*;A2%5t<;JZ0#T|4%2A42}DQ$l<f#%6=f(-
z+3~HO&-Bv;NZh0t&O?o@B63MqkyFqi&|L*?WZ`y1W-c4&`sq0%;1l(2w{Fj?ni@35
zS*Z>JL#Za*<NHbSGBEHY@MI5V#&}~H=A3xBo4Klcb>`CJZ9bFQAqE@gf9FTz4KK3x
z62ocRrx`Hki%FDMo-Z&VVC`jY<QN`2sF3^-JTCS&%FATR=2c5a3m)_NGx-X?-tHR_
zXE}}fI@>+sdo)T*{=n;2^+={5`~aOc5ET{e6){G&7KL_TLjkczCRDseb$E>mIJyZ4
zU^rKr3G@LVIti5#<3?(&)c%b@vSrx^dBaBWM%7cgCNf3w#!WT$7V2elkmzmUr|+)X
z=4!3nu5!<eN=5NT-zv~U>5RG>fA$Z+-xY)Mjmo8<{}cCB7LdDh#ORy1o%);{pmuHb
zD&K2&tp!>2G1Q1)a|&xol3%0<Ybm5C$JzjsQ70aVOAD(YBzH~Ff;pR5>^ibux$XAc
z6~MP0Ysf03BpH+wz_&Avlw=qR_3=f7@>~%VQXvw${lNx`+^W>ObpsE%H0`SRW<SRK
zH%x=#$AVLyslF!3TXD-JW$mQZc;nS6<!TkR4vf0}5e4UXNdsg2f>j;~x_^1Pt0CYn
z1wc=Yc52|dGB(YYS~?qDv-CwJ{BqH}8bi%kjCC<V653J`?=xK_D{T(kei)oS1+UXP
z|Mw1|Bi}fiXnIwV8%z?2D~<X=w&L&(GrB~{2t?mj4rSp4$cD&WVWYNJ%MVrNuBc(h
z5;jE?VIga4wewEBvpcN~RUI)}1RwEWT3iFSBa)>@n0fzF-V?B3R+M}Zy|&zU`>Jk7
zh~(;d#Fh!FVh&hHOWKVX5ikC=yGUYS1c?|;>ZM+kEfHdSipXQ@=AYBZ5#OEQhuuX=
zAr;cT5pvPF{PHEJcbwxAT+Az$r)l@Q@Od@hQlNh(y6;>3n*v(%Vq|n7;KEMz!}}K$
z%&qOR=4qFeYX$YeCJl7L0k%11FLswdo4KhG!`CN+P%cspRM8b}lcAMuMAaRu?uQI|
z#Y^<e1e+zfrLf+?L?E_3F^O-}FEg#yP0hy5p$FG>W%Z721fJPlg7A<j2XHlm90~90
zYn`Bdt%!P<u?G8zKi_Plll<NK3O(6IupB@g9L<K1oBjvKg4k{-nRr|dOu0y_vjR}P
zZ>UL1w6!giff2VbXf|%*zGhVgoIb$G-NljiZ`ma~Xbo2HvRh8}-9QJ6k)@SoWBPT&
zqlAH{Fe%KD9x_f~Qo)=DDV*4~BZ^y-Zefz=!;JTasIPT+fy(=6&bzATy*4QkHqBTZ
zsDl7~0b<h*gu-^iMTxz!(VFa`4RI!FiKRg=+1npa<KMvHgs3iBXQvAym63qhEBfYM
zljK5lO%vAH?Kq4+LyV;Nm|{SR2J50{7IWd$)om+n!RX$Yz6Tn0j8PMN;=t-&Do4*E
zv6+2#PoGl!pkz<~o;xHP%1F)AINl56>0q`Bnum!CQKV;2p!*`TaouXMy4G)XBPC^3
zF~_A5*D9Bdt4342VGkn3KqmG`mquNE9211_*>HtbzL3GNn;`*Ct|=}^Fztk}(*fIn
z<C3t>aEQ51j?o2_%U!i=pH^(K=Rt3nr|Y|W-K@T!NjC&VIK3Rc*nH!3>N^GfFol_;
zK71UA#-FdplTf!>(LXV^Uom{R#iz%T{E{}if<3GHKkje1PNJQ(H)6Qhk-h>-m{R;!
z_#A#*<<pIu4{N|UsLE=T0KPb{Z8t0T(rE-}#O0m3x4>Rl@=~G<FcB?NzxJeRz&fFF
zqmFxaJW(Ik-_UVz8AUJX9pqaf66(C;#MhpaM2>YK6HIrm0E>EkU_Hd>3{zyW#F5d`
zVm%d{yACEhs4MjW6_7iEQ>ci-8qe>tv3J~Uiw#iLVg(Q91kA4a&kBP#6fz!RLwpmR
zJ|NT<mOL^2VyV`>5{f)Sr`PNTToPmC5UY^w+Y;jpElh<z?}HG@kuAoTQBKX{85;&g
zs;xlo-=Er#YaJNOAJ2|44k6Kj^;!{%Ry1RJ1yI#1*JL6n^jpqTV$>0Tk_O&vtGzGs
zfa1GUx@FMoM#auEOy#|b%vA*5v3Hq4c_zJijWCtXlu(qbk#d3e$r(IBbzc9Miuykg
z&#e5kSRUU5fXUwkfb{=S0x6lhI2+r!*@_uBo46TxD496An*1xcFIvgwn;Q_p7Y%AD
zB~UW5l!7Wk6Rt4g8TeUsJRHj$wu;o(vU#Zp$Kq<lWqf>^5mEeI|NT6cacVPr84Am3
z^zyucX?lXY;pt;IwFc<07GoG_PC<GQAv7=LgxxSr;d+C@bZ=xkC*%$5keOQ_qtc1;
z@mWB@y`#?vLt}#@ZELF1IbRAUagYsPay&TN+k{k$k*yw|TEwZ#x?0UKxT(q7w=H41
zIvr+o!KQnh3HI_YlgP=6Y_=h|Q}ukpdc4V|IdsTpNwd-T5lq)f8KV%%(ISLQ<!a1G
zOQy<BvU?LDO1jmJXu4N3=Qrrtc7-M+{kQbE4Qrc{l}Ut_jfT@xR&~2mFHQ3Aa36yv
z(!tD=c~Y>j3sc=DHnw1L%d>!`(hH^M5HqPhm4%-Mr`&esDcpPkJG)!0BWBaFyyN8i
zCSjRxDa-M!ki`bN{X&-N;e+&9msnAHfoM}P2BmKaVG*`#4FgtM7OXdPE~Y9BoFVbX
z`ZTyk;>R-#rF)18oVMF!vu^M@{YJQQwfw}KAL<xUU-+>7?k!Hus|k25`=dT_J0HF8
z9mp0Kiaj9STF3kaRrJV#n;Pgsm9)lr?KA+S+eb`z{qmns?<w>@9>#%q?&EQcb@~vH
z^x4t#yNna98;c=4L?H@%{;-2FCGdnEf9!L`%y0@%5sVV46Re74l_cV|8lj+@mzW7a
zdU}^5WfL`uMNfy}<Xy4JWw~yHGI!o{ME#-LmtRcN%de;1i@3raWUp5Iv@(7|cFfi^
zvaB}P0c}_Er8IiQ_!x3l<PyBcHVh4WU-`rMs;@Xq?Rz^*a8BX@mDnm$SXhVec0~TK
zxBuPxFpK7I{O-)RE0h0^8=JTr*w|Z}eDm`e8(aLNju)`Ex3(}c`2Q7Wf)%c;=H*d%
zYGhWiDyD2J=4EuAey-YJAoR!tQBnk#;!_3gq;?fgUnVm{(ycguCF;E!_WJw%@=J36
z7J#1chirT5_L}B++U(Tp@p}ikMp?r<bypuIhDSZa_p-0Gl_`mV^<Q<uJ$n!4Z(8F*
zvDYOfCbcJ@sq=K>#HG3ufFm~56hIm~mdCQ0Hklihb0(|BEIgVf>wYCFu_$Ex(r2D)
zlidwuj&`Q7X{DQqGtvBW@%Yxo`$29~ugy<iZ9*lzizrf}h&+Gk3qy7*{Td%N{7JyZ
ziFbugEXia#KWH_!E?8`lXEK$9sGcGBD0+C>z>SZhk>4rcXes!hxkjp5jrkOwY@6?o
z4V?W&JKS?;hdsV&Z8$Co(3XAmWmh0n)4c>!OB{5yO_540=wB=E=T(8=0<RiQl%qS|
z$i^IC14CUetU?`yWwyw40<zs<<+(zFCVe5DRq-@)xp+Q9y>#u>$z77Fj=s!S`HKr+
zEVqv%1*Cco3u}450Vfw=miK~Y6>MOxw|H$SbYxYFu+HpZas6E*^}DLEwdsC3#y)TB
zlSA0s<;9Dmsnsm4{P^*&roXT~VuSuRd)asMhxk8CFX&=wYT_vNO-}7-@C{7<FKR}g
z{~<K3u~`Zhq(m+I1sa?psMgCC1#d|}h#gi&XyKiaa#AnTA!XV^@FmGyH$?Uc<d<}F
z>0AU-PUm#B?sT~Bbah#?#pesW+*=((S!t;*wO1B{k#iz$pwk}`QV$1PvD1~1XU#NG
zzEf9ZkV$v|?LkN)w8`1yxxxVNPC)yjqNDNz;djs~d!0e3Tb)Huvguh8KxoBL&3^in
zA?Uk3Nvq<Rvb4K_Oq+d;hl<>rzQ<G<RX9k#$#n&_<vnlL63mTI9mjp$N2{R)^1#no
z&C`d0HjY}+PuIF~P;Ik06oz+~2?PB#gv@i<3KM)g?3KmC0-DPDl936Ni2hZHpXHfq
z+Xw~Lb@^Z-b<VXT{lax=l^5iXIic7;8iO(SHg|?AI*{>$$@u7!F3aGgrvfUMkBvUM
z(jPsI<#;}EW}@}Np@CTQN58kn?f$%gUVDn!nlig>%N2rH4JyM4xORg4>lLefeveti
zC+n)1`LBAA17@~tj`nmE%)$Lbp!19bjKgE;GDAoY&S=CRcN7$Pq3U6HJwjQ}0~kb$
zBZJ`L>I>6sSnYj_l(rPvePP%qLC3^^1Vu0r?iiFUYA^m9t^}b%EFtsgA9FvAy9K|1
zKd|wlWDdh%lt%~9SeD7^R%!Z000!xXVAO2$)s!GJNl6Y>hnQX)d2McGx57;n|L#lM
zP6MMs-#(P~z5dxm{O2(u=;3VQBxnD>J&EhVZiyER42&2|))mav6%0-k>@xpv!dyOg
z$ymZ)Ls2l+Xmj#Ob8=Cz>h1Tr{C(w?i7aJNFv8kOKLJYxLkZt_T~$L3BH}=2fg*_%
zUrP}~TLD?iI^GB*QLrCzL?2lLoz6aAg_VEzkyu5+M8U|w5cG`=jP*_Rf7D^9`;#F5
z#0L7;aG=*C$pHQihwSfg!2FMJ5V3W!Vf>#UxU!qmNAT6vG^fP1{@uFs&X8SgB5C4~
zOE7*|Pf8n2;+g>P_3VuxEdm77zUpzLuoujpH+)CIW(-D{KHvJmX2((6QT5Z=Q%f#T
zv%HMZP<x1&d!@caZwMG}2z_w`Xmo;!LR}dV`8zVG9A+W9R}H4^YDsR68_g5(Rrc%U
zA5Mt;P1Wf7&}gv0`pxx*EgW{3!4@YdCA0O|^&kE922&OBPj6S+Az2romldT;j{QcP
zw<jXuz0prF#RhIKm@VFT&HytEPk|ECsl4&E57wtl?t75&G)Xx3Y1B49b8({$=*}HD
z8HR#!sRoJT9q?FFaV}l*?S2XpvQY%crp?FU?P$o#IK^B_yU<q)Sw5(>%RKTRiyJQy
zeHTE)RzqO{aAY3+&zgcTGfrD07i+Y*g?OP9LavP*!j;FjA1<v1X~=U}Rn=>F2@w~g
z22Vb`7=KMK+%uY*wThr;Pbv#HP-k%na*=)ONcNRPMX@|k__HZJ`E|WlA<m(Z)Z%!R
zgi}RWQIAN<>2?=eeX+xzS(JWhe@5)18vhWN{*f|zx@4j*KshzfIRX_yr6Rtce0z9G
zs(AF}Pu)t~xM!>F1W$FD*rn<yoETPfi+d}f8&8|3OMnZWfdiht4<Av8h&vR{EjZK$
z#P!9MEgEHrO?sM!;ig{T1MzkJZxbwOCapZm-zk*#+j*$|Psb7YuY5`(Vf%fW3RxRC
zIsLb~go2I~umS>)8k^b@3)>sB6a>k{Z`hkE2tx)aRv-DGoLx(FEsd)&U*V5RJfh<~
z{F&GY)0RKFQLXluS#3;ahg06(-CaM;^D~67u(43A%fEZzrZY8mwpF$zOMF-NnJ}bY
zefX^>jvTCBc~>JhkV3t;#t4p>aIM(k04kXD>rF_@VHi}e$G;hAyYX^n)esZSQN<T_
zg6Zi>Vw^jeLQS7y>%G4O>N>f4<r{G+IcJ?I-g)uci7rR59o+u-kRS5NlG0B9){&s}
z9zXbj=pzd<`!NV=btitRaOM%uhlDN*uzrW)iV({q2A2`T<DzoKwERLOO@Be@=nuyT
zW{wzdwsEr<IPrYnx)mfmpG`nnRf94C5dH*XG0^A_%OuR49Tg%6AXv?6Mc7t3JOeMf
zi=1+V>ybq@2xy21^3<_lsLO0FA!&<I-Y6O^_dzZH+v5gh)8R1ew_TvVtMq>oZ~XII
z`bSXzk8NZ;{_*d>?V@O4H}_rL#G*kJ*^aCy*_;|nYZX_d$?)-`P4p#@s__0M8Ip_D
ztSd|^YK(kSvz5x4HGf>eH|eZF`02o@oA~Cq#dEZNr+YQ6r{@Rc9v}(ExTum;n@3Qx
z?GJ_84k3!AksM*-zpQ*6C$MVAim6B!gYEm*YzEGI3-Xbh7l%_QB;UoobAu^6w$tyJ
zLrCctRzV?izA|Q#;#=E8`>dh59~U&Q%RrZSson9AG>C`Y^^`$ci}scW<^z}Pb%Yx?
zr-EV8p;t)BHwls0`hDvbi<ZIm>qa?4r0XWu^19XCAJhaRos}aWvA>^lBu^|$Zv8qT
zMN*o07&eBX_}c9{#RslFc%nZjCE6(pt(w`$?Bwi&6IT~>-OAclv4s6E&O7={Y=Ij|
z_n9Xmzq04AA+(^RLz*g-Spd6rAtsekLTFAfPV<Kt3C(QRWe2buY4n@t_c&Z6TETwJ
z2FvN*#qPD`sUPJhF$<o-WXeSjvVWobkmIz_S_+UcF}20;V4#ywEzqTSK;NPUKnvxP
z;*}!GT2n~giBg81aY=UP&w=-E(MjkCpal&h>=W}@19s4$zMJ`>`EjNE`S9{%nH5Lk
z+3%s%`YP_j7Z^_97VAaqj<->^M2ll=BX+kD&N26xjJIxMg-RJYOtp!8Y6{`BSlYxM
z7J!xzuN&P$CI8=}{XXUoegDR1%>ypm3;P}HHQ(!><7)qxL;aU0xyZ{%_46b6tQG4-
z&n`ZT+~m&$deX~xQ4%FV3H}Y7dn2Q5CtOMLMtN)ChQxdS@k!ypig-@iaX#V2WEYbD
z@%i!&Oy8Fi&8i(k6JryD3-+dJl^$&re*`nd7tb+FEA(W`Fl!>Xt!Sa5&CnPdQv$Qd
z?vnz;qi5lvN9I3~BaEOl@GvhwpcGHp{}rHmQ(pSf&8L+mmg9{)3P@=)<CL>>6!>PV
zGeH}}HlcczYMU3I2o6+Ph6x;kQ#Nwe1>8qIAqX=9FhZhlcWyVqC+j`^aPmd)&EN{8
z3gUE#T~!@^JKg?xl>H4FS}B94tRL^39j6po>#tjMqUMUMb4yDYsT<0i^*;8n{coWu
zSKyiu|GkSL-{JXxgyw%8ng4n5P}Gsa{-)GVtXXiuW*HKGJ75zI282BuQic`!Xk-XV
zM%-H@#GD09TBk`jzG(O`<Zr`YizRQapaMtF__VrRwms!|O`mgrwE2P92kD^Ssk_Aw
zT7lc)bh^Y#?YS*>y4CLJC+1xXP+9OY8e6^DgsgkKE71eM6vOEXe8rCKI*}0-p>}Cw
zSGA?E`2sndNJC-PdiA~6?*{2M@r4bK+2;CJ^btmB*%(zzl-G_F8I&#g0`PhceFs#I
zTcT2^xMj$kaFLeApe<G%DejwQU=E!Bn%NG(;<q3r-P%&)om4-QT&OOryts^?^bQ7u
z(g5PJzL`v-(M_0>X7_69l!uV4y_V}hGQSchYMD{z&hLen=usxcD+u6(5*9i;jn%6_
zzf?(E;_<PnVJ|qvdS-nw={tBeVu^COiF5hVetpQ!)r_Da*~5#}>uUZI0mSDHGfGZR
zZk>=CkEmV6dGJ5({%DJYyhTO{=+|QZ9(zLt4!zetb$(Rnl@QdCSgQ+gR7@VGGH@-h
zoa=b}H>2K$6|zEoKO~1D{x78c{~sRzWz_$j0aZQxkX2B4n9_~sEX<|M1FaPi2xXv2
zX}s$ABh4+P77Q7-SuIK~k}QxK2vp>imDknQ9njTKlr`&4!LNm$1>QfjFTI#NaLFL&
z_zrEJa-6ohjyCRN{d`|=`N4hvA`FIOG4CoodQ8B-)udF(mb+^QTITZQ?MlF6$(FJg
z5e6X(a1RcZtQ=uJa_B^w-Do3BSGWrh!`FK=grx?!bA+i`-cad6@4B#d=k3638t1ks
zcUJA<!XkE7<Y8V}sE0oVvX6^)KmW4vE8QiA^k<l_72QKB3<6ouwMJoT-cr+anfY}2
zNPJc3+L)W)yATS_O2@mRjETl<qcsqw_0Kk41vpDxi)e*>E7_zg2nqW1q_aMvmne!}
z{>rI0CtuISoUJn8F{^KCC_^Ee=nOjeSUHhNyclBep|Zk!dCa_!s3|(S6D}4;hSAJs
zghIwd<5eIXXcrPxY6Q&D9wLsMa`;e9jf_u?`mIykXTAgki3sa!p+FZeOtG%Kh!>~P
zVsJ=CN(iX0H|DL=Ivz|GHx#W^-7H@MV)HZK(P2>U<B5NIrgW4&s|!xXapWvQ58~ui
zQo~n6#U;kcLQ!V0|5ZGhauua8nIkmlk?0#Uw;NxuB6yIDi#9@HRULa+@WIWQj5@MR
zsVrqtPV$M7w7R`>0yl~b;;gUrc-nV-Wv=iYH&BxfER!$_S}00^K|TpEDvCqxFHzKJ
z!AePvN4OJB+y>V{Wje~O%_d6rAu$JT6AJ2BK(ot_5Vp`8^P0-Ke$hCTJc!7ooaObk
zh0M2kG|gRJrZs57ZszMh+h0NObhQJ`b`!<bw_sL4+Dg(*GrwXOe!?O?OtNK=kWL^`
zEC7(I09b{obCrX&ksjt0endNLI!YQ0jyp(+s`wlvX`mgxm$4>M9*Rc!ka;yiZzQb$
z;T8-lAPzzs5HvT*WvwJ{9P45Tf;BIR`R+cneV+3+w5|^~ifV-B9ezRbn&eIkKI0sf
zLI+)gc9WnwNwDBg>;ohp=1!+WEl+|=WLIVdEbj+37QM5xv#GkFcC509vtgv+rEBKe
zmv6CAw?gdkwt}yDXvNJdmGQ2c6ZsAu<BQD6-`@G_!x}~+&z;UA(yU+Res<TVD=5u1
zXb{V^XzA6pC`(|W+P1|>4esd^Dvb#lHSQKXo)i?QY~nu*4@uHx>)*XLG8Jul(;&Jf
z)6(Q%vJgdnzi3fmy+1{Dtru9cun+mhzEW{76ic>T7{)f%*Pe(WR~+|*QGBrNhE9M;
zGlg5E#gZ8DCg{Jk#>Yxoh<cXTkyqjUiZ%Unx(p~x`$F1R4|Ij>Rwqu9f*3gbO=gDJ
z8I%!eOlpKI1~J112YMziRQWJ~L0GL4)QTy6ii4<Cn|wD^H)$}AoGHE6{$&zLQ_$}*
zc6Q<iKyc?cK&vFh62-2|!_{Opf+XxR<Z@;I(Rjm>N3{iY4z#th1EqtO=Vnl{*Oq=Y
zzbO*CM~mJ@GD5l@WGmYY-Ek5nkX5oq4RCUrInHSM*?oyR&f5!X;{-A5DbQ*Q_CReq
zLmwENikRYXoh0|VyT!Lg8<*HeNv!|n`$!O`JJze)Za|0Xf)7t9%8F$>fS2*&?l0Lp
zFdAq=G;;Bz$j-*)*5vj$J4NFy_KBSlp;gpV3kK;s%Uj<L`yq?=I5Se~vm=7e?Qy&(
zz`Qmbuqx&ecH@K(R-4OB0<-H1eo3pz^!CcZtO%9?wki(hxW?^_%eN`#0B884ZBpqk
zbcAPqYv7j1V0NRK5{ya_gmOMR|D_w^puI|jE&Sk*`WDLOnUQk3x2}B7{kagpjIrR_
zSQ7&vXyp=O9{&xp9{rFvmGwg~6MV4Yl*~#L@wyW$`l@)uYyef99|Q@3ZvV0(Nw_Mh
zhGA@w{lIEBLn@)rI;l_PSq1B74`IT6FgiSG^0kYQgrECpoSI6?*=h9dX{bYNVAFW7
zrJ~<L0`Hf;A|xEy$Cy6j%x_rF-g??vvLCK=2TNdQj!<RIx<iVPx=#Vr;pMFr5pQLY
z^(3dj=Be{XmRi`2*Y)G-m()MG4uLCB^_zeJA}n_!6_sYVHAFRdCFPV+pEbgAs=f4C
zkIDIpUi84aQA#TGLlStbAt!zMit#FRLnl-iiGL3;hY%A2qQ7<Ir|6TCGkiA#Zv&u3
z5Gf+~(*;*a1x<^Zru2i0zoO2%P(25Oiy#5fwTw8_k>I$1$3AtV*a-s@N4UvR6F#E_
zfGS<?Ane=H^xo>#4J^_v=4>6N5G2>Ga?GS7bK<CCG5#d*SNv4|@%00H-BJ@{-*tw@
zI(uN#=JS@rpdLB<DSHm?mjbw53-MAMwJY7K2D+yB@8F<CJR*@)v89OuEuKIZ-Oktw
zVYcW{j!okA&*y)8UAX2>z(D>kj)uP1KL;%SJ&%gmIy-vE+gaHDw^(8D|0_08vQmpJ
zqN=JM_O=CRbXW0UQ0yX773K1vBzyPBXkCOdl8h@J^-juu|D!Mp@326)`T0bD)#bs|
zFy7t8*8_ZPFfM?3hRTeK(Qd0;D^V*e4A`g4`+FG$VanIG6Vy&_$eShOoukXZrzkxy
z1&kK0PIa#xSKNt;nRg&_B$hXhzif<kGl2`p6adV>h!7ciNS8*p;oQC%iY(i>`MNz2
zr<jawYm+rFZ<`5LwD<|*g`CHz7~}~qzi@@sK25Jdz~UZ=0`NMHd`EtUqe(xgy<!mf
zgbHfORF@Z8wsipU9GsyBtIA&XE2!&0)s}OXc84_et&jHuy@3ur02dA0eFoC`zRl}R
zt3?0XO@00j37+(OM_a$wKdHg~eOJ{ioX!6qCD4C&d*uJYT#1rZR1~ZTig;ff=MIEq
zW5}Im-iZ-T7L7-5uX_yEtf8qGG21g2$Z)d_{;V+668!sS+r-V3k%{ig$G6K5h<&I!
zkh@$YZJGABX@RX#Xm3Wui989+dB{jxl(A>8e&ebS*c~Grh|o%Ee5;Q68V^2|V!1E(
zZuvBF*^4Cee0UZU^ZUxY#BsP7Z#E^vQ+0|y!MubT6lxa3UrignefV`HZY-}Z=J?q(
z;}Uebg!p604b`tqt~p3_P!t{YT1?3$!Ln9{Fl1Ev^AEn{yj!LlTZ{;W2(oM<xstpM
z7vek|LM;9op>yWFU-KS6&(dl}x{PxY7LUOn%km&o<2sxTe`abF1mBh)(4T=vmEUy6
zXr((}|9^bFV|-`rk~Q36$F{AG?R0G0w(X>2+wR!x*tTukHlF{PnRCxM_nG(ou)o*d
zzq)o^wQAK`KP)Qplg1k!VE-LSgLR4?jsO55Lx2bCKL8PNfXsxI?SFsV|3v|50T7@I
zxQqyR^lraI%zS31mk2l_taMIAf1S{Ofq5U>@p<FzPL?9z&akpl`V~|y{&$iQ@roOa
zjtZEV%{)DWr+)K56SO7FBeSxVV&gQ!;xvm-ff|&*=m+>Hf3UO!M#cHX`Go<2rmA6J
zredmLV01J8kPD3SgC&{lhWi^Bh_D&SXaoq|5%A#s=PTGa89SKh8yfTf{WSncriu$V
z0CD^`)Y50FJ$^o~zJFgywBsKyBmy4hFOfj~F(2N)?w>ht;2p?K9Pmvn$VbLNr?t~p
zLB+xWa+>)cpe2N!ksf%w6b$v>;+u43Kl25|mHYo6pTOVa`wMJMzMn@D6#>TteByr(
z<$ps3eCmIHi{`|H{{dY=;VNY!VSouxUpd0uPhLO8)k{Z6ts-F}VWeOPx`%s*yT`kM
z4H#;G#mT@~{}wvU9BTp>pyv|>Kr;V%2*v;TX@&p0SNNYpsAy@61nBw{@u1gelynwX
zRWx)ZOSSPI#2XCq2SF{7r4ovAz;WzH>S8wvN2g}t5v+Fse>cplbEH~`^AG@3cE#a(
z)$*sy=j$1An|73n*=l@`s%pR<mu>&12nrVrHG_g`S;0<H7zUdP#cFIo&Mt$n)pE7y
zDUz@NJsP9;?E>DPP@Tcre%g8=@Gg6aT0kMmB}-Fuo#DV8BDh1gXe_qc$%9K;&}*kM
znpA5q>H_kJxH=*QNFhG0_W?uY!Mv1CqK><qfm&8J7nGMcmm0dE*_>(rK0zWO$Dwc6
zrLV*z+MMkjO8=7iIAIQ1Cjqm{P9~@?7E+@9F#CDjQyIVT$)aJy`E)mlGGi*bsf7lW
zg_<CDJ6c4rrC3k2&zpPt8X*RSd+{2S6s!#^+n-wYLpM<0x-mt;SGLcQ1OrW!roEg+
zI15H6{SeoA6G10UUzl$E_6mQ(V(LLC!bYS`a&)De{*N_Iye6JInOg(|L;OZl^ABm#
z6U2@0l!P4M3=~2f+cdZIj?p)+a3_$3p^I_k5k(mb<nT{8cxt<Qh)a9XA<nH|So_Gp
z366Oz;^YBD`4}rWzq|`4aL~-7zzJxl%V3`<3n*tph`C5|<Xl1x8lQ0;VQjky>lZ|u
zW5n=h;?#O&!)JWa$?Hxd4ncP1lr9~b#p<Ap>2{D9_S6Os^m7FRA%Pf8<)Wt$xbO&g
ze`!!YGVtk``e8sSNWVHe0KxxID@-k5U9=mx;<&n7{JT#1nW~t^7$Et?!TvA!<{#UJ
zasUfI!0G<)ymMC4RQQL1A1;}cSTKn|C;_h=0vUps5(=0Q5ZKHwrGZ|7E>3^~uYP>`
z2?6y0>T@|?ZY&l0Ht%|%GOvy6PXfq0d)p`Pthe;fo1-<?Z?18WME>>BT>Q_maB=%O
z5PQvZBOs+2vEd<r{X@3`L&i{qS$<Fk(vkY;A(+a!M#BiqU^J%M!QT@ptkl6`gDw?G
zqcW2YTi04~B)Dk`+2~8nmSb<Va>$yEL(>_QS24E56+E4av>G~8hMoos4Y>L(iYVoJ
zkd$c;b^d{1!9YxZCS2Xih|cpEvnA$1g)dPQSbjJdkwSWwxn-?2-uk2Pdj2AGAsa*@
zw3m-J(}>$;5IRE~9vfqDV*56s%*v*meRGF;nIc<uUJ=2<!!3Atq26<cpX{!#K7-*T
zhrU+l(zBDd8^0yzoi<x*UbqU^gge^pwN;|5xMSn@y1pVLjeJk-y5wLiI*)<YQp0=u
zQ4t@{n@Xd+-rk@$rjciA!bBE;8M=$a&t@ADO%9%IkZi$3AL0%&b-msVj0W4@jWkh=
z7@vF8>l|xF2iqPWJM%iCP(K*Q7&yfiN2vF;G%z7_$1>g2++=V8*|2m~5)j?mKQ8nq
z2r6`T#3s9m-I|dug~l|1(3}->W<YYmn00p~?3JfS+b`o@RD_@ljh+KC57$W&(X{`g
z@4KGv$+jT*j0)+wXk>{z(@*r>eoWf@jEVxsq-I@SMA7R?{NKjz`v_QlqsN5rFgerG
zAu7S*G%NYaxvGe6Z!cF<MyS@h!;*?(xcz?op!9*fsTcz_0=UJ)-uG_z+feF4cDNnQ
z5+h^ZOOwO6wTU^Lle}zV8)8PeM^sO3W=eA=^`Cu+^9+9uUFeeFD>=jEMqMKfMX8aZ
z;NL7G`k#qH$ny*WtuhfS2g<%7sbEIR^HR1JD;M3Q;~&rqa0|GTmqM*9G90A5{!Jv+
z7_!VG2v9q+0S^w~v2$m(p!@44^#3)g`>RM_CVGAO&*$~WkH2$^fUCgD%1X-uMs@_S
zCU#&)DQKj{t;UB1jEvU;N{8qt#-z!ljEteEO6f)?#VRC+X!`|dXMIMPn4mvz|Ihl-
z|6lc^xBFX>x*yTcv!A;#=YOjqWqA9vJAgSI9DsxTpQG!4hW~FRBum*^6<Zj^hs+i`
zo`x=-NMTK(A^n^AdAS6I7y>=kZ`L^0!aV2-dk+1CKP!5BJEAA(2sdu;=O^$d#h;)t
zQ{%w}kd&eyjIKwUEmPc2X<UDNKR(v*zuD;j@W&|17c&&;1Gu@SF4AN3Bg{E4qVuEV
zh%m%N;-E2mk?};xQ|qh4<ya0|JQMlzr_ukSJBXdDA==hW^Q9)J14c)qa+zSUa^0X<
zlIGj?B}7QCwHnJhXmvBtCa95HVE_YnNVcw8d}@>`*0V}FaVJ(?VI4gSHXL!E!?g3Q
zu>e+eTo}ySNPHPzE>E7&u&A`)4A!(q;}O-+3vD@&>SM7iX7IDTsj825<E)KCyjCs)
zPMOjsN2ouy*f=mqLv)1kEUrtbwdymkXUjO=tmu%nSz2aOa9*M+c2Joh-9A2BRG@{|
zIh~R5Eb)r8XpE1HhshSkJyEKW=TW}#SX#xj)nPqXpt5W;0T?j-0X-hmBwe#RgL6Fm
zbmplRY&%v~pwE&i1ojLzPO$dcOX_bwQ*)uL<m|VTptfrkO}(9$GTkDP7|(CAYCen+
zA+BzZ6unJq{bNn5UU#XcnzJ7Ji*vI#HCP$f((Na1oATkL^-}vD2CRBPboI5IC>|KB
z?iqkQf*T`mw3kJxGlXnq=2o_cZ8f^1iFc@8XVvDIp>g{z6XI8^7;ll#P^s;h(cyNI
znT#6BNftAsq7#m`HC$q|WObJ&nU2DdEqy&`@pfBO<xLkuoN;U)3;dB)JmG0|etzw=
z*J7h^(vK`6S@a_x*KDHs=duMa+$jfvU)&~Ejek}+TP>AUvL1K&bFkTh=+wmuH-9Hz
zbcCf>v4Ph!=%5`UalNg>VIKuPHzD?~JyH*adVf}Sd(fK*ja$~CSclG-F&t-GpsMXM
zU>x%1l=h%+IWl_O@1D)a@F?Madbt=dsNsZ_a`U}GNh76(&tA_hLD;!qWa7l7HA5Zm
zWR340->;CoQS{qG&a`NTcR@5-wjApqrfssjb-4xf4nX-=r!5>ThQD?ZR;wZK^QS^Y
zO>q;%zZi&k-@}dx?~S&TMi^+HXLKl6UQyQ=PQR^_q-ZAIv7UAIRkyi?dxn1m4iydf
zB%JRV;T%4Q(VrSFQk7>B>C}}L9JTPvN|fFbdb4SwcJ4<b-EI3SJ?l!yfODN*!QWSW
zK`}?qRgg<>GF5!>p5+VxA6_GW#aqL|X<*^hGuvrmB~F>iUC#cFX>Q8Ue)ttyCuu;A
zL6NN&`v8x7V@vNIy5kb^SU#z~FY5-`r>3GM($1YdtGx>3GREUofc0*e^fdA}r8@|a
zRlzwxp$`WX`v08fD>~^LTK-RoPt^Q}NAJ+roK4%b%~%JOPA<fSL#cy2N(E7<xxzJ&
zV4kfc7bW!Cg-ILfwXoDXXs1OU5;FtuoA^7-V2Npw<ebfo@!@!yqtDT}ugTL;4&Bzb
z@jZ7K$nmn&2*SV>9kby9`|qv;^wkFZ;}Q1MJJ}eQLlk=I10bmIn9OsJ&6e0IGZmgN
z9|ItJ!^14xbEA+`P=Z&{>b0=dI}X8N-H#TI)|f7&O?xdx+Ug^U5>5;C(sWY@lSZzX
zi4PiS@o(Wp6HV9)JeQr!kJJFVttDySDvOq4g?I7pMTT>5T%Kc{TkPJD$U?1K*0sn<
z>~i>A$<>@oQCodUp+kgf%LSyH>Nl4Cd7R)KsmHvDPN*j+TnCu!?~TxmD+li}FlA=W
z6LlEDjQo+uEJiYNbk<y8W!7#iDGr|Rj<HCkGD$L-j)h1H2{e6imZ*uWsMJpSmU(Vg
z#fx^5UUMh~sU#C&a<Bmyd5x<KkA!s4slg<#N);Avf$hOVZUS^<!%H>6#q*KD&VWN3
zEDftlFF~Ybmy7#C_Xy`EUb0yUAy{{(<&-4SBDHG-H9)|hO?P^0aS<AkqA=_d65DGM
zk#*WU)2J!;6!pr=_qZ?VBONo995MN{hs+?drR>iu)~c3^Jn>NvrT-1FPFxu)WF$c}
zmHM|XAJ|+M(fujPj;tK-@G&CRfGbR-AU_smUA+l6?M~f(LME?efEtm?9G?KBYz`rk
zNGwwrdl3owLgWKSZ)>=vkU?`aHm`k}flxyMNj4!?uWQMb=6K2a!rH4u<7;b}CJHG9
zhd3CKKay0orDXK;h;-@TFq=$>jlkw~#dKzo;{(0bL}qTv4volgD8tNRBkvpLjjOM3
zI6y*AC>{loZ$O?J!9qNF*4#ExB5W==cb404O(DWbGJ^Sp<kG5aA^jRAHi-~GC$uf0
zs_TN7KUXfUG&i4k2l@AIJd7G@oeppkmH_qgKWpWGorJ#wNdTJHf3;g6lIV+-O26kV
zEK8SNfG!#vd<?u87MKYeA&bQG=|Hlk2aHpw1(SRWMF`{i@wa0bH&;k4e!M!EUbT2m
z@i-p(eBC@?|H4&*Es&qLANvV2fD*4JX`oGt%}I?*8t^+02-g^uigJB0i-Z++d3m;W
zBKBB((`$@g_YBTut<i0YOI!8+yw$%PhDjD@Doz%2SLGy>-}5jBBG_r-rhm+*n9yIf
zeFrJN^hNp-@1|S+1$2JDbk0yMZLs5LM9=P+E4zSlBsum$Y}jA<H3X%y4>yL;wxg;6
zK!DM{Y$(BC*D1%c*yE2VGA>TUyhlsf#l@%JCwfJf*Fo<|gE;=4fB6(p<aSB1(*lje
zhMFG+Q`gTYoE65QrBDAf@*|`z$eWUrtYb=>@$kwVX6%ba2zHgl9$p>yNt(M`u=OD@
z7eOASgfw<nXowg&W+Ag>D}~prURW`naKq;+F?%una*2cc6w<x{$&Y(T4gdaFq#o2q
zw@e%IN3<<I5+;upfm;amxQyS(OVy|re0hC@xrV05nYhc0YG&t%VmQYrTrZ9qbxilD
zVSlt}sIQ;OCk#yjhQT2*s$ptzs-K8`)GnvI%{CSFJ|qJ%Ff~FRIL+>A;Chkb0#f)z
zA6;G=LdE+`W-4>B9j;`;AsaJ=$k8mu>kWoDQZiOgiE?oBJ*0_=OXwK-zzT^~#ATy2
zHy5EV?`}7v&2-uzwTc{sab+;Td<pp9^X<n2#kMsd*TMigqyGt=C@YEl+qKMDX~G8E
z7%%|LzG0seXDZ!e-VX`<lv^`55EJ}W#9vm9)*yjR(iT@%F?czjw!J#zh7<=|v(2(x
z!!Jf14hoh#3(J-LyTa{TKeW?~!zEkaeC=4TV^<C!|DHDQUSiO@c)@D?VX--ssM`$%
zhuVm1Y7F*}R1WU4gIJ)iEa^Mw@1vO`udFWJLv#u+oHde8<@c*Chjk!1`S0U`rE2Rd
zdzu$Jt}HRuU^qXs0zzQ<Dm<fDWo4{gGITuGpg~s1LqzJa@0q1+FYzlcmEWh@L*A^r
zR9iH=?0IyUdPa0m7;}8zp04sXF1?eE$y(~)OI#3wH<Hm`dV|<s3{F^JV%%8`WCOC5
zVUgQk3)Z0Xw&nD#JTE|Ul`ABmDtqudv^+M5EOLe*@iwCHNZLjWbQd*Hz_S)T%taA^
zKl>jGfCPEZb=I!x^8mI_VBt2c{ZWfV)h{R3wz>}^z3zAn`BY3JX*8Eg%5Y4S;wk<L
z)#W$+=NeB0R<CTpCy&eJowU&C-^sUYdWN_Tw?Vv)^*|eg#^5H2uJmwKnWa9qVwZDO
z5j3NCx-iXal=wB_ji?r8ua(w&Y-$P)tI&0#3clDRm#)zUQuD>s(`$A{57QH0a1UsI
zjiA=(d}3;>OnicW=#bq(Gd6ez=SH5aj+$)HB{l$B?8_7bU;i7BKIcrOY0fT83{c#?
z(XiYd11)*>Ag}mGqRRD?!zXhmfH%)8(F3ysa1hC@Vcyt=mehhbnxpfV)bJ<i<IJ~`
zx$m4Q{xZ8xz^!q=4*$qH_^YFLxBg!Ep1aOl;swPL#D-KdcArdZWIA|rd)ISHrgU#~
z9w#(rfyGot{D*Qf9Sbwx6r7>xmHr+@P?q8F+Qk2+Asy7t=7wQBYK8nUYdl?^-lU7)
zr&Ny`Eplks#@pCztMlFTBrZMED9r>fHcY$#aoo`YdVOLK#+W)y)qRiti+c)|UzQ^?
ze|?g^X{7B>JL<^9^*wjo*5L=mJ-Qb)8m-yX!p{f_Sb1-cNHan*5Ak$N#ShQWQ?SeM
zdG|CSH1P&0q%lh63|LRx1xD`|KCw{-<%k0~xd!C2V$_ANeup23WB%py@wsX+GvV4P
zYzjvprDJCp3X>8!4rmGoT2e{c;+ui$pNNBc*BZJtY2b8})d#+R<NxAA0DEEpX7z#p
zDwb$%Yh-Li_fI9@ztMjSRkWROj4^#W&utFJYCi1EXKggc6n>DLkVROR#-f>J;HTG#
zCzH?=R#qlVkfb>=wq<2mXRv~y#^Qq^qW0kb6q4`JK$(M3lfQO@OYt}=Own=Q<LCg+
zu{B{;Pmq4dJk0dEdg|PI`)azI8M@K}s)6-gti^4dTuTHO6$Nb+;_u1pr^Ka|;N(su
zmHFW&vRgqUBbk}NPRBrO&yR>VdqZF3>;<X2c!SDKbW_rAczRIIiKqH5OR+B=R-AZS
zE$*c)o=B3da*Y{gE5`lfM!Im9Bh2>XnYEMnroG>F;Q=fRowSqu#ybpO>|Jh`G%SS$
zU{)#${Ls>KnZ4Nu9Clt{zdF1e%#@9Jt2KtXMOu^hXFHL*?*<OjWhK!q3Ap1!d*h=I
ziSN*nz>A0gvcC-+?sCi}XZZ2a6&{iq#`)+G?h+RdBhgJ-&v}?fwy<s^+0fk;S}0u;
z4pN8<I=`s9tl}W`<De9A4wqHLN|8f#82gd1uK@)QDStCi8EH9@M&>GYTuOd^@|^9$
zU=oAviUoI$rQLswzy^b@14?_l>d@kI{XQBbt5KfnLIdkFE?I-poI-ssdM8Qyv@D0Q
zBdoWidv$>G?frY5DD2V(QO)J?El%S}tl&M=yn1{8AoX0)WYGW;Pq>U<gQSzPj*42e
zX=}9p8{vW+VxWek@BwLZQ<UQoXno!er8d=enE%-Wwjzug4q=F8bcQKdJip6mwy~*~
z0vUR!Tm-8^<GK7?)=}B$PUp6G3)ep8OM<t^3|&cZjv2t*b%{e7s}2jC1qVfpu2eO(
zW%VGA`cZFNPxDS8IN6fDlIt){i`2rqM9$^TB&LwF(p+4#;RlQTdH|1oq@h2iHkp)1
zL7*ZSIqJ~VwIi#vP9f)j-QF|U=Ja)uf7kwf0X4a@k%(F7IQq?xN#T&HA$v&MS`LMy
zG%s(mq*-xt1Tz=~d5NfWL!7jk`M{HOT>E#du9<6AMa&^vhVmT;9LMLZZaj$w?IC>F
z>Kjk%umRWWs^BN~$PWGQMeh_7q>)`isEsp0?4Q5bQZopC&6~)a94hHk#~NH;l`n<*
zWTTzkLvycc?mg#A)U!|0hV99Q^Vi=UsCnYxZwne!zITb<+#*bzhJ~2PqvK&$H;^@F
zp+T|JGnyargin@KFio{plbesLSE|3!MevU)wVIi`)>%dx%s7cuuPm^HNbUUu?Q1J)
zIqa{v>u{W4$l%umBWL2)W`>^y^V76kF5_|GtlalsL~TC!a)?u~mw=_Im>WDiND9fK
zt?6tre$if4yYmxIWo&;BOdq^h=yaCBxQul#v*L9BGiY%P9q6)9BB?8^Thg-s6==`J
zI=!SP)cj`Atf9UT;h9IS_DVHBspnuZrutZa50z9TufBg(Z)!K0DEiW?Gy~;Bplulr
zx<tJyk{wuO9PouNURw3t=8$~K#RqtSI1INL7nmz-Vdt*@s-*XdWK%w<nyj-NN>|v+
z71am*rmg&%OvwEx{GcTz5hj-)vOy=<ZOnA#s4X|t-03#*JIt{Fn?4dxGaEv5<jtJ;
zcfg*{k=)W3EfKtqNIh|SB$>gV65n5P(0XUoMow_Bgvz?VCwGElG2p|nO>2orDq(iY
z5USvIipx&s<Bvm04OLwhMO`eSGz>q=m$M>SEY>)U6cpB?Z~?}2={igigXp#T{IYeq
z8dLe{ZXXHop)53YjV^Hw&P(;l?J|~U=;~`V%Nnf)*8G1eMpyhFyAanf<V<f)BrCN0
zt1pDD4q8G`H$svsERcV+hOtN+@@ZD5B=L0L@xl;C7#{agyy_eN%30$W-}Y($#oz8v
z*e)L3WRGyQgTPDjL5$O*YmeevJkNIx_KrLx%p((;^qi*#FK9IwY9_2>Mf?F58Nu^@
zumUf+PfrQAS8%NtfhiS6z8z{B#ut<423h=$=Y`KUdx+-Y#)WmvpN@z3Yrpuz$0OaC
zIwqkyCVO#@a*(w<Lu4!$^h0d!?Tzw=PS+q7=_@;>nUiiupO_ds7-fjV<%yv+b=Z;K
zo0lPY4VZV~F47}4yL1iY^H=GbKKC$WcvwnWME>}bgou4)OXx1Ux~qVXz*9jmGFC!f
z&oq4=11U=99hCuozI0y+!kw#PXgQ8Ni=*LAw@$$#ggz@yQ>T+b0+mC0heLv|cmdV(
z1W+f(w{nnMa2cVW&yn!Gn-GVc$`eeZ<8$U!@l0wVf0`lZ%mz7L?XwZb=~dB}Y2aU+
z#lbqC7k>FFVs4We>y|d39dq#=2yEpT;+;ZxYJT|#_a}3BJt|-b@1ng5#MnwRlWUmF
zq%Ws0=l$fIDf;P2l;3R~Hia0Qz-!t>b@NgrBge7~C(qrBfS3P5?)^Q(tR`Zh(I06>
zUYYv&AoS-kiOlOv&imh{%n&+<(7FI%G8!O`@YneBzw4@;JF~>Uvfos#U6oW(eQX>|
zCygBZ+F|#Do8(HQg2feeEI~vBD40{Hm*psW7G)Sy?B+(KIGMs=!y6<PEGlUh6iTd7
zD}U!HtOd$ZcZv$v>uO2zJ&W_bz-wvOKc9P=1{)p9tTn{nwLbB_oqwDkt+%{xW~hF%
z>R#h#-u?rX1Dyvv<p0c#mv9Kp&mB#tvM0$-J4f#STvrivjhg1e5=w5o8d^Pae+DHh
z{sn8E4Y!-&Miy#nFR;TAWE&P|`=dhDO-Db!*M#H72;uBn09CZOfP-ctV%AGEJq1Mv
zZWryQ7^ph5=W6uc6DSjM`wi&*r%5%{yPV*(2eT#V-W%BvHLNoaac!8ICQ&ku&Dm7Z
z#s0MlEcC@pZ6J#U=^vuR89Co!NvSA}72;C%Cv5nKNNF1^b8P8>N+Hdy{UV-@NwlE%
z5bGa=jFJ*YE+f&)m=gVrm}81Xl8qF|I5<tn_NGO-q})Tes(FQ(d7A^Iun`{T1r4rV
zPSX-s5wK*L8O6Z{VIQ^iG<WNHF;zG`Wx0*}-(Vw-e$X>=6b2jYSjWMQsy?jsji|YB
zOUIdo4G}3Wo`{`*eIJEVjnQzZ)Uvu)<Ln4-;@Y<d;n62;DR#Cjso}V8gPfmc7G<}_
zl~EI1X%qlY!Sm?S&PmXPEeet2?UB|&yUp+*OPZW51%hR%T6SBMsef2btJvpZ;#FzR
z*b=5yy)asoz>kK7H0u@CSZfC%r8Ivbm|1TSdIXthMMs#C6R*(*r9Pe-492^RisJYw
z&t7gPqhH*M7o-z}Pg!qn_7EuY;`G+EBF?pzyCc+GPr)}HE=rT~V~degvq(boktt(^
z*hR#mP>X}iFu&qEDQ&-^pwvN4(#7af7p4v6*f(ejw<VlI3y3DSI0e)JmjjBSF)cio
zhQu@4yq4Hu&t(*R&pWN9ef@elSa8y#L~jc2?UMo%rWoqMopVHwOb!4I|7>s|m*WRd
zSGHCM?&YEiBail}Y*cYbg?1ytbgw|B4KgW+=Ct^1XV})H6DKPr0V_gZ45SbUUynNo
z-@p`#kIgo)4ly(AJ8(6&8;By*95}7PU^i)dsohF9-A%IJk7v|~`yyR{JZVil>^l}Q
zxKF?xR2R!N*SpRRJouwQviPk8+1@TDABb;%j)Ze&WY}K7?EA`3luB?L!3@8(+Rg2V
z_L*)ntHV2aG9&P+@DSA-bdaTM)!_CO<9I11#YHW*3nL9gYu0PFNH-2j=CR^}5-rTH
z7bylhkt$*=sy+og=l&s?h#0BWaCEsXdVCl6Zs=qWMf(iUj7fIz;WYpkIx}Ggp@zwO
zF90&bxG<zmbGRg_)=f)qSuwM|vW{yh1GqC~>ZvI38hh;GhSXs?BgYyhg-x;72JEiE
z(Q9<+hS4_Vwy?cmT;;QzkWKySAjUGkKdarGFw4i?FL+&QX^J?@tH>g-&tODp_VByE
z=5XFJ&rzI(6Wnl?MRB~f0LK#|!#I|(aF~K+U*XT|qJnAZVdp|5j{N9=0o#&p8Dm8_
zuxsTER{=a|Jj>nj5O-;uyB2J>GdIQGe5|a8?FW^3RE1dXw}#Dil&+BF&!uhAIs3fx
zkmPns<n`P4M&QNd9PB(Ve%`234tu!oHeVE`T+5c{ABf)It6<So_Qm5_3sGjKmd!}Z
z;nwGTthR*#A44y?eizXjGD|kb=>FVcIqE6$_SZyIo{k@zT3c13MY&CHQd>zhpQAKX
z1p=Q;5E-4ZtJ8S&AamCf-M&yP@=P^}+1mbP-F|x#T%LRQ^DrouRayD!2WGSuEFeq}
zlk)H6sO4K%VtqWb^TLk&GjY&s-WqLN_Y--O_X~zI<q+we=JWM$E$VAkQY;Y~irVg&
z3q|St8|udzo_eKP`rr?h?`)X6uVZHHVnJ9F_ZfbRqfH2{fdi;syveDYrco_vm<I)f
zZ04<@h4EM$;u8?*%bR}RK(zZ%6U>*#=dDaFJUA{*krbrP<(+PhRLQe@Z8G~d+;BJQ
z8?gT*whk7bSg!~8=E8sF4*J%@v)TPD@Bc{e<*&p7(SXIXoBeYA^jRCnx8`ya`St@c
zpZ1(&dlA9JRUciyCV6^oB5t=u(oKQw01k;;ds(pMJEWK@tw>3$!;#q)@^S7u<d6>C
zfehMt7VR?eFNd0h866L)Ys;+p6XI!$dM?OYUV+7V8}jitj9g3hU|EH}p*P%t4r)j~
z+b4vp=7dL^X=2KlUm$yJExVf)0Y5I)fz6%*y@QZ1e`MBl*$I4?wY$Lko%B|^5CX=A
z5&o-RR-fR@Rm`w*{gv+-&>RKHny;q6KLbf8Jh;|IQbED%h!PJp{qTC_JY7D7IwRan
zgXhzvT)w*x>NJox(!M837S<@P;Wju`R6}I&R#d}e_)s*9PPKq4*KxcDWBNgojA2NP
z$ImibA{?AHYQQmAi-jzcedT;%0AU}-CW@9{%#q)Rqs&fBm?mLLVn3f1hY&S|3Ayl`
z6&D+o_edYT&uwc+x9AneR&y%Q!;)f7c0?aVcjOPAp+~2zqf_BEuK1!)!kw5Ak%a%G
zgeN>J=q_A)i<~`5DblLsZd`ktkUbhh&l6pTX5dCzft+Fw&Ee>5o7i6)Y~^;w%<J$I
zm?7fTY(pbW*f?ENhaqCpOq-prZC1WT82)7J;Te+}xdoNbJ0M?iiD<(<3+BnzG=TAP
zl9`BY%io3C=#6>j2Bvgdo!W4wYS1yT_fG0b{IcukEysXA?}UL{9GGvot9uxm`#{SM
z*=Ak@opLZ~rJ@6mU(*=Sj1Z6&GH9OSdgL1@Zy&q3_<~?pbV+ail|gM$N5G+LrkymW
zANSt{3l=SWP?i8V@H=24@_$@Q{)O_ErDQD&Snl?r^*m9bs=493F%BJ70Y}Y*f|19B
z@rz3Rh4HM?$S5Jv=9CrZTNnmI9LCp;Z_M4K4IUYf!FRstIK^}1p6xh!;q&$K0^Ln5
z0bC>`CAc%jgvgY7u;_>gqmIbX9vc4;Pge~(9)`d$b%kO>BNU5V8@lvfL$VDkIL%RM
z4s)|x`>L)Cvrs{`7G0UKfFoFMl@?u=kv>6y$Ly4^FwK(ROP|n+WMdf;tm1<5M4Ljp
zDD!7%G)#F0=Ah5qrOy#rK3fZ>L%4U%A`{2#G1^0~d0s`G#YI{3=qQ=V>6n4khSUIp
z?zHHVn!&2brtFJP)+5DrU#yx5W<w*iG&@$nk6g8aF+gb{^WC@@T*17{<hI1R!=g;4
zujsyKzSCB%-p6QVGFh_cF>LUHYe;$>Lie$XLXzMb*EM#xaiL}mV2_}EZhn@1*itZ2
zO$KC33XmfO#T)~^MN)azM;$}D+~L)QLfSn}q5tI*yrb!K-OU1dRXd3wMoR)_Ww$O3
zDIBRdfV)l_#piEnw?uZXeBhk-r6GG4tWmAF(1A-E%*zY4P%o90otT0}ycDj&s#H~v
z%6cOUAIum0GrU^AL1j)#f%zc8%`q~?5!2q%CaF-fs5j((W(L+5_EVN<S1mh=;WgIq
zo^4uq?zej^>Jf1+Par5bgV0l-lZJ3Awcb6jm8?x(#0-)dk(g#^qhnxKD?M?&NA4Wx
zH!&To%UO7%xo6E-7SPZ@h=Y-F4p8?~7(@<4Zozv^Y|3bYO2<I<7#r%?`OJv)2gaLk
z@IXrv@n(k_hDOdUXmk|Qw3K2`G4z`Ts%A$wgfCoqS+glsV(<-O7<hZ)KVG1F9Q0m*
zcQ-j&B{3}m=KHf`)7)a(5G{=Ezk6&5B73L62v*&56V1<IK<#T1uREtOoa8o+#owSH
z_hS<7P!>S?#Z6TQikU)^ZXuCvl#J{CJ4}sGGHi$fI1T~;8romx<^Ow22k0Ie{KW!N
zkhYxzEODJRyI5->&4Q1bYIg3j7s4qI07(cbLzXI2h?P?HSnp}{$Q!5*^565>1HkVv
zklpbZfpMF41e~Ysm8_U2mrXaodALFDCc`2-w4eGF{MO4&Kf=u{A5@u7h9fGfJ;WKX
zxrhH^zt51UxqJHPzU;gvjuW_H3S%++oj0HnH;{kUsATnz@}Ia>L_*hmLjg2R{$qTY
z@3?%t`&pfIn_=9We7p7p@607+R{`)aOB&?Gnmn#Qr#s&coh-kS=wjzXcZ36w@!moZ
z`(WSv>sIkV@`3-Hk-r_aCZB6$GiF)96|hIxT>QqqHNr^Htlc^Qrkb18b@k{>8UCIm
z8}${Ea#(UixtO)$UpiccLM~G4?N?8BlXhW-;rTiCQ>x;bu7xm^ntD493_686wO4DX
zE*chE(ysJHOC_#?KC!NHX8P8{kYagvDY<TL1m?&K`f;8`qrrWe<o9pt)w~Pp2)vVd
ziq2nSMiKiRh2g+i!be=GihKdP6*;-rNdiz3NB}>*5!TR41|T;3Afj?Zj01EacjSAi
z&HezW4RPbwap+rAP8w6t2AM4g#~gB7Go&Y>TOHgbX*Zg1YK81%lrhSTAKrSCFTuSj
zFcXU6hEdQ8SY)3PaB3s|8GrPW(D2#(X5mg+lH$2#A24J-(CZQJD^gDrF${?YyRFxA
z+x=11yTV!AH$VR7I|ESD75WZHqya$N_kSjnlbNm2f6tqvl(iKB0}mgax)(il;6Q>v
z0ay)E@wuW#wFadE*x6*HaB0m^Rp$iikws%R+6bogH{$l&UTj-u5rLt}`|r=h?}Qy@
zJV(-jvD7l;367K5PhL5uTOV%|y4BybglQr<P<8dQM`%!+;!Mr?2bR=~PNJdw+NebA
zt5XU{$)3`vnJ5Em^N7N~Td_>dQYF~3PxnEDBXaK^4e4_uhAuj*Zn1Fpc{|CmJ%R?a
zrR1FPopMMvmAB*_rMoycZde6jq?K0fT(q%rLH56Ed5lG0oD^9%1A7;G&y}ITR39pV
zRu!(EYa0$Rj_OpovuEg3;g54qU2vlIjaK4~Q#afLOJq9%gN&g#82dH}p`MXSnz)X(
zJ_xx=``AzlwpAMKnH~|9XB1g`I0Ew(v}ztNK1U9UKG3E!N9o*HkV!k-pGRA#H~dWP
z_#vbIvm8t3)2Qr5Y1Q^x(5PXxwkl3()wR0VN)zIke50@N!eKE_y}?-l6UU;Dt&;x4
zH|DWZ)lth<IGL8U#H+=#if!h=rOmxRqXwq!bb-QiW^QZ?{k~O;gg>oD#p!XQuGZ|<
z;Jb{7Q(B)qf9D;vp>&_35zBrJ-e7RBw3T_Y{{?-v`E#;tXJ2D*%ryhDb{_^3)?iv@
zUm(EFb=qZ;B28NYYqW%(fqfzRr!1?ur6jp635!zPLwS_n4|*3g3UeLi%ftKK;*{j3
z`wkwgh1AFu2PRyuAxn?*wB(-!2a=kZGR|+WhI-E^k)LCX)450(M)xO;h8g#*EI*8%
zsWFTtkK}0noVa=<J%d3Mqz2!6Y}AzANb*K+h;jLi3sHUf4*57g6SM7dNMkBG8Ai|I
zRuBxD5l{~Pip``ad@0!1pUob6Gs-C_1|}i_!tV@Jn_`|O4h7ZusgGwxAVe*74iUFC
zQW1g#M0nzWiI1?5etF;aQ;VKLGuKz-S3J-W6Z#*~;|VR!z;tnnkmE5D|Bz|MuxY~A
zFZO6o8GHV%P=v#o5KW9cO<xpuDo=is*0soB^MY@bg{6dqg)rhg3cH6O3cHV}lsDh}
z6?PHr9?;{gmD7kP8T`}uX0}##vl`I?D*NkgEIQU&s^?xhs~sno)GwTTw$Ho<nyf14
zT8_`&A-#VW{*g1M<%Yi^%{vptps)k0KCG)h>j4j5tP3awH?Z_O8x3VFRq_#L977`>
zU2vqMr=~HmH{qk)ZEE_G`t)yRCRk&QKpbiAG}H#UvabYEKXblE_LlgjRUGpQe&Lb8
zS}&}*F^KLaLb2RSwBaN<eNy0$1jFvcVQ<T%^o62-fhFE<c!hzA$6GjjZ_(Xr`kZ-)
zGype{kz2ub%pKm1EfvJ@qu)|1G~qy+ql4}X?0<_<BS1`Q`Z26F4p5A|0soM{W~L5*
z$!O9^DJTo-8=4va4`oGhLk2)u`7GLSp=rGTPAMmZC@-me3Qr<|NLVOVXkj2p7aA<J
zhKQC-v$7FC?|L)Yk8LZZge2Y0a<eIV4d(T2FU{OsQi0ZGa_S!zRhc|@XIoA9-!Aoi
zzWI!s#iaNF9gBetw^MpIIjt`;j3H=FR90G{Nvy3ToqO1{rEZNf)Nl(@Xg1-bXxgM@
z!0r?}u>^+_TLoko>vM^0td3VJoGWf!-MI37wZFb)5y~AbXv;HiLV<tf>0@3Z1+Z}_
znoQR{V7d2!^&w4Re@E3%aT_3!t&po$kdO%*vEgEIU9!VK3hFyt*I7lgm6CT3W7KwQ
zNlU`P41PA(ySoPEb|$l^t&gKjVv-D8jCWU8nMeEnKpe+p*=z5ZY{}-lu76FLM3~w{
zt#2xQ$?kuODUJ5{j3~>d)hETb7cxgh&Gbk30xHR0zu6X|Pl(S~mOnRdto+Ss?vhB3
z*@gO&)oPaf?bf9`-q+CHFc;Ox!IJzVuTHeXbky5RlBea^_B>*h)76F2QkDT^z@x7!
zoJCLti4$|H0u&rkSOCv8Ad~>1fTc|~fx~hihS+raB-Ie(bPgSsDNT1xL36J}I}bJf
zhdULTs#%QlswCZ@9UPF{f|0u&9waE@>2komd@fbI54(XAT--z8dt^a&_<VE^Z|N`S
zSlI!=Bb>sVA$s#36^vaBLJp5nCJ_ZP>H37u;G*drB@(1Nv!Uyv!LFZYWcYzzlJF(j
zAq8|4{%+8-WU(4T4|9c3O{L6{dyK-8R0#$B!iu^(lHS3Oc&(2>=}-AkiAFwPKJ$V|
z7qLPN;>1>QPFI7Ru8K~sNly6OJI2B0EMt~R-jZ$6*b-#+;!%-1M6;>a-#`C_aPp62
z*3fA@76T;m9H1Efwb=bH$t-RBk7_7n>}cp<Zugh8(0|oMWegvgPFBb<QOevwg@T3!
zF9Ze3Al9GC0=a6P)QJ)^oEbIU2Ca}8QISut<n8x^*tDydewbM&$5H5;&k-m&j!2Z!
z$ulG4o|}%3+*9s3N9}DddsjKSKp2B5@=k_i{xajE*FS?fFnPlmxf55~gM%n~Q^O1p
zB^fo7cjoBvM(l<AEKp)<$@-9Cb0F_E8Z9{+<vom4aQ;}VR_$mQYyS51S$bX_PKNR1
zYTM;Z?%<(CsEIl%`dOfM$UDZ0>j!Q6cpxd#Fls#$zGFumR!*{dy}Xm7qk)Rx?)T`m
zQq>xKQH5EUeo{P3=r<i`ta-`f#}&57sBx?TFIH8%=;fZ!;4=AVa$xn&|D*ydD4sGY
zDMG}2!j785mKuB=H)231quPt9F2i}eGG0AV#0Xu=wYZ(bS|uV{*?tTzqD!kpy>Oz%
zF)~B4K5?$BC*O#!i+FTc*w{nvpkm%G^9)C#Xo7zn@|l{9@F5k6Svjjc-3|Ic=>sus
zO;s!ZdBI;^{&fW%YmtR}0l^eHZeqP0leBJNvP_-od^YK3Ur|S|rRL2fff*0#c&#YI
zcY^>OSv&?j)N#%o?vaFhVj7}Ub7s=&MJfVud%<W2H@{W!3fPxOJiphbWpWTrn{p{L
zkHc_eEQUq*HkM>=CI%MQcCRiP!^7!*H$J)vVaXok>ZCao&rkA5O^VBTy{{m8>Lv(g
zYJW}fN0QS%110^nuJ_>BG($K8F213}@A4Hh7ng)xHr9{wgx(k0z(|RKdNwddyeq}>
zmFl$ux;VZshN|0yxd}E_h#aSYG+E9<k@P~5*MVBiRfyZth99ByS3_KpAX+ccFhJ34
z3PT8`Rk?!C3MYckiUA=I1&kpOMG65Y;|u~J(>he)<&huL7^09JK@@{(<G6S}Dc=w6
zaFLq_i%U5fHV>eb$e>ZE5=dW_$Rh5Ujye`4kvubnU#z~+1D9eDN5FRd?h*4u%{-Cd
zonUeKkJy(}$@ZBOpa%o0MFc<PX^I8}__j^BG3J6DcP<~bNbo*!XWUX@#zAg|)w69`
zZuUlUGD*)WM33Ei@;@Yu;BX`HXKE=uGK(YTP+W3)P1rLIu7D01LQuK;!T%5~(fE8k
z)l|SN=S8Z&e)1ZNhGP8muj?D0{!L>1{bh<#q>)*mISLVmF`@A?N29Si5{W9|@I@(E
zK3Q1L5JElOE1?|uIwzK-Vh04a56i|JW5zB+9^n(cYF<@V4B+P=SRZ?_c^nEX8+UYH
zXs-Fn=HwIe`-|WKx&FTTc;7CtDnK!Yli-UDFCW&@Y^Zg1Lvtvn?AXw8owTij>6<P#
zU)H@#yS)yY042<AXm`XM9CYb`X}Daqtr-GTc-H+l%ZY7&^5<;w<t>=aTTtI&7c;Q$
z89i;;dOp%v+dJUwnF>BS>}I_wQ`>#T&+lIe7an-Ll`b1!f8)6`Iga-R0o<j;0f@(6
z<hTF$7m3@r*joOx+sjgg^imqY{oK@{YvYc$0~M3f1EClp0}}2>nl)4VnJ@lZ9L68Q
zhIMK`Xu@VapwFUmadB0nx(v0!!knd2nkFw06y=&i1z;0dxnR&RuX47aVyP)i^I@`S
z@@Iq>A{BppJmY2R>-lr(eC}(l1-n~ofE9deP}<Or$p*2{RcZY+7$k$I8MZ0n4_E(b
z8{T?YTfbcsMkjeNjlpv#hR^R^Ja`AY>To>x!JQDicZjF3g_sQ-kAYKi|D_MAvVkF4
z9(HAf@$L@)Hx+oiZdU%+-xzpEeJ%|5VfrEAKJdy|2q`Y%`YGF8iZJlyZv9ak%Lzdp
z{v-KRcz3^1Vm#&i$d%ryq2H9aPkOYt32i>tql&NV5#FIuyOii_8*>Ybs$6a~CdVdq
zW}6qgDoWM;6w37~_>iIpKz7_D2@<C-LY|Ae5hMF|p}2d*+&b{0&Sp}9uiTwCG7g<5
z!%oqI@AcM01CN+(PZvJ|NT5-hTkqXO3#9q^wN3TB^<*dzZ14+m6zQhKSjZ?yM38&j
zNXqom!AgBYf8Ae_<@4Z#lIOMhpNJM!-6}(~2*f?Q@T4mcBeDrN_GkBKI^RYraS+oZ
zmPIm8&$qC)^VZ31j=%w$thmGxD6+(Yuo5PNo1nsSh#bCI5o5>G$ePXAV~d6S3BN@I
z*TE%We?K`%0*lbA{UR>*emcdVIE=N(w0qO^d5z2OQUwm8RQcVMEJfv|iv>HgBkZL;
z;>4HAQF%e%+^rP35wukCkCVBT-X(h4<MbjYMSmyFrPB*of7Z)!v4AYx&Awm4gBdVH
zWGRUJF`&0-1DW|oEMJF}BQDwxGV>^uf)|}Y@T1Dr`OUbQ%V2Ooh?s6K9O4J+REb)?
z-b5|7`N))mV(M>k_IQTk-?c`Gptau&=GiAPCC<~$m}|uSEK)<LvW=Y;?Jg+{;B9+L
zde17=$jkd`v5C*tZVmO_oitj@B5M$<RkrarfJEVbt<Itis<ij%)zk1$sm1u~J|NQY
zLV>Sbc&z8>ghoh8h|&x&iRLpvU#lg!Qsoq<Bt~y?E4hpB=XMC~`(<oBjQA*+QL-(_
zYnfbAsl`T*)-B2?z~1>flruX<xT7z!&BH7NL83hgJrpKFL2C<bgM#$dn4qEabQ&4T
zU7_W)7lh`;yO4j3<C$Nec=t%;mP{6*vv$3(%h_|}09ifuZmUW5n<dEA`kN?&367W@
zfM=bVhm+*-dzTVsOx8?i(~#7rnm@Dn^wm}FaA=k9G`Z>U&meji?6|5G?SjE}p|k1q
zOJaTg`k>NTycPxUc$CkVuE}wxuT5(`q$@uCKl&X?3`QM@M~i}Ov}_3-QtSMWiIPcR
zeFx64RCc&wO}C|B8`vfLu%9ys2+G-RlEa%=+5=o!sCyJ(S=0)4?n)Nrg21q3`cAB3
zi1`<<UEc-!_-?X;>2?@lul97IMx%S$sCcKZZQnx}vRCg!{Ck9$%bMJyYEq*0x!nW2
z5XKj;b-jjJsik!~%{SFe<>gM(gJbOEmhWx|^!fsXdrm&Zh~^JrMR#?WwmS|z$FRdZ
z@;&6(78Okh&KDHZtpHTL)PAqGaTDqSqvDYd(44a89B|%uX5k)5s3IKNm0`lWgB}CM
zcGB5O(F&wUHDYJiSv{K=uZa>|Ked2PdFI2KOtNG(a+^NJPE%}+WM<X~RjtWThpF$y
zh?;mTt&!|N2Em}KW+K_bg$SjImi-n6fCcCMauajrS<-$JR#F9-T8U&wfM`p#_C{E9
z=Q0A_89@+U&Pd<w$1baZ7yYR`u^5mz?FcLrydfi!3<s18Cj6X6SYTH`b^dQJD>M?Y
zg2q{4xdh=jF)?^Mo@l}@uWNbI@knt~HJ-|OH<Fh;3d-8D_GD${#7}Ss{FB`xw?zFm
zJ9<2t%^Q;p46P~Jk@ieHEzB;U`F$?29!QlNy~4BQRJ>|PaA)u^FD@<#m9TKG;EjA%
z?&C}K<VMZf46EF%P90N((jOLB*cRB6a<R6?{sc%L0oa1JDA4gzLbERETzPS@M6>8n
z*mXkR;YbXa^=|3;VB%&_Nkcm5R(<l%)rmy<WAYUuAV%<389h_;-4AX_VHlw`Qhelz
z5W;g)yJAeQUE9OGyx^dJ|BlyXrQY{lbZs%IO<|Jk$Xo#uJ4!hlz@S?NUV6#%wZnss
zF(cyU%Fumsbg`?lQ+fgK#!}Pe{g&ZBL9yRkrALPYi@LVm{$2gC46oS(4wt<u1s$hk
zPwgA}(A<a-A{3<qI2fL+pD~L36f*t^RBkTNm|h6>TrG|+0SlPIg-EFZX8N>L(9}-K
z;0m|pPP;&v<rj*L@IZ?qeEf2~1*x^3B(t>~FNtN++xPQ@#+T^Zjk~PJmnv@6Y)9y9
znkmN#ZzQHs_J^JZOdEy!ZMZkDkQfozpAGuL@KuOM2k~T<OyS^NuRC}$ODbfpJQ)lP
zUUn?@v8zil4E`_XWO1zTW)<w|=6J@oBK$AgRIaJC?r>|`!?dH0T5>cj6Lrc@-QI<g
zCmc=&GYFSiLU=+8B3fDf^U_M>2vfiY90_Kxp}Jr<<s?d`NM(*SSe5MfXSHamguk$#
z8^ElO`+_ija^aB=71zK`5d<C~4nl`+liqrv%AX7|LDslYRhkS##^8&2$W}}aVVyL9
z>~{oXt++`B7G<S(L_#+^$aSGpIs>@|rgU&Kxz`uC)fc(d#|K<{F>r@aC@CW<%?$cN
zef`SIRA+*2Z;1{tG7$-oc|@OuV8TjorpwD!ETvo1mbP;xG(CXZSK%Cc<fmzc-E+}j
zm&h^L>Uq+Kceu^!<dvd08|r_7%kyR9UW*+HU@V+Xx;h^!SfeN0GWP|I29j!Iz%Np2
zB?<9T%@(|-*h+VhB(3AH`AvphIzVQ$6EVIend+HhfJWOl((F&*6$E*weEP|9>WP~F
zY~i$>jxz2;Cik6bm2`vJ0n?#DrPpdNjh_?y<qS!ItFT_88#l2*fs+i^q{q-cJml9(
zfdygGFL!XQkdoh$!o`8^S-nQkj5mX3Ilq4nBnCM7Lpq58ZzO@XJ|6o`$^vO$r}<4z
zfxdcJIDj-X$04i>{jn*+Z9+KKguK51-G>AACU@?Nc-@r#-GtEY2?tWI&drBConR!4
z4ZC@5b}izTR{d#Y+)1mR7U9ihmuU+;_>fzJFTBREpxTH#=XJ2Q^KkR$83<`*uFRgV
z+PV-8WZsfIx{nsTx3AQK`j(0c+fOa6QlLTzxppKetxa&DSq*$zPcTfJbPp#eJ_S#H
z<fJdDzL2toymViO>V+c7D^h~j2ZJ=ovLZ<-R!^MiCcTf2|KOe{&|Fh-SIfaM$H;nh
zPpXdfVx_wyN=lhDAk38{p_nF>qB)f#+r6dPwg14Bnod^$l!;CjpYnxcZ0vo<L`TJi
zOtDtgoTAzZV&iFtWCr3cGe%FOkaz(e`VQBf);{~xl_|r>(Oo)pBgBh=<JGP`b?ml<
zGdK5p5b&(`FfwwJNtk+O(shNxXhth+=S_U47dn+n_`o@C>L1hN>w^6qExgO_5l^bd
zQI(mAUsOqV59|z>gXI{=deC*XU>vnME?tpYpVQfqs>d&?3^9(RzUE8UAucq&rq$cF
z`z&r*g(tAL?m0MHMq2FvZYOI?XW#8L?c5j7dI&s8bh-K<6o#z{)0lC8TII+H__MLr
z<g+9_QT4X>_bZv6q>4o%GEYT@m9bUtq<ay%gii<a5$|Wtr*p;=Z#j5*P|YPyR8_!E
z8%Ox#17Z;>UVox6RcDLF0L#n%Esa(NjK9b&q853^FRqJ@UU=`+CfMS8eIHw@xj)9{
zU$HV@p@UwTXm|+;5>>JF;TghboOyb;bcIp7vMMIVbTZ0T)D`eVS3sC@f<R9ML)b<L
zn}1hB2~2rQ9RrLlSpYYAihpja{>v?1&K-a#*vL89y19$#+Zb8>^C};uqUj1y7yB%d
zW}4Ig9jib<zz!^cZ5Hq=6l^9wSwULz1h*y8oKd=1%2a?642nueuK(>}<RF6g+G43L
z$atxj4d3gD?it$)smqa!#;@p9aMC!Z<7wq>ZEGcA>+ACX=-0=yNJtnzsvu`DL*5>Y
zPPH)VWcW@d#$?2ff4Dqu1IE0a{CILO1A}S5zDfWM%#iawJ>feC6&Z^bl7MPoA{;z1
zmKUr$Y>G3A+&rS5MjtA!s=f3;WcV3WS!RX5n#XLb0jc)b80lMp|K33~#WbTK<x*|O
zPkup8K1Vyov^p)<@lh?P0VR1y6+PVs6O;-_7S2ANn7pc-u@x||48g5Azdf>`$jJnA
zcah3g`=LKtN3B`6#YNOHxmHpQg>(5G-o+x5kwr4ju1yd1F^~1mt`urnw;cw0?me*w
zGHXzcu8j}4wrA(RH8?louu9BOcuw79?4){j1`jw?#!qjA(t}Zi6nv7O77B5vF4MBH
z{y>8)rdmQTzegUxse_50OZJM<@1UTb!+fBe+b>OmMqiP7n)*X-ozXlmnaD6Is{;R~
z0sj5FRbH!T8Te8(*F|e=*<D}uVBrI3@4WH$mTFOR02b^uUPAu=HFg$oRW8rNryCIv
zke2QaX+*lEQ@XpmB_yR&x*Mb<rMpv7>6C7X_wjcvxL*J7%EOTlXTN85b~f0U+06`d
z?D!z?^m>&d6)Q;(N4S?bdpL5Qb-5Rhy<WZ6xc0TSs^8plhzduLvR|td<O!Pk!EhBf
z9_33PTlMi3NE8G|v4sx;(PHJIQ|uW9Xm6xBEfn`Dn~GAhi9qFB9^}Al1j@S^MSii&
z6FHDXF8A=P3on;u%h14%o5~h+VjcG3*PHf?lj%r;T4<F-v&43S-or8yi1PmOQCo{F
zg{2I>-gyXa5hd)c#b@q0!s@z#)vxc3pLpZKRlkLq<}(Mp!LX2+<V3L`x_Qk~@-SOL
z<6}B$K&WYYKZSeXOOV89P*FJFe?De0d3@DOJ)<#M*)NEaDCbaaw7R3{a{IXEqPYz$
z9^%dGB)xi5L&-%dyUo6>#D0Rzit4%`X#}^RA$d28&+v0A-ja)PP))5<aFaYe@Q|Fb
z7>&VK1ImP_D9vw3Sm|Z<OZ}zI$HaUWb=~cvZHjedjwv}e%5#jWo67TLKg?Fjrpmq$
zpY5<0_Y)dDw&bQYSv{&YG9RE)Q`9R*KTm&sZ0IMTn+JSKX47*bxiT?%$Nci$OU;_#
z9FGY~r$fce1~e48jA1hTNu2`+r44#X?PV&ZtTFpO-z$S*O^ODM!e=ce`2vYh9-WT-
ze#2Sghe#zK+v6@z4!!mfKE#ne8l==&QaL2TL8X%$bbD*FmK0<AJf*8Vpu(q7T}hki
zY(W33Lf%>(q-fYz@{5$OFzazBXKBP+GAwVlzP|2HCoMO8Fw(xjo)@^rZWIVPo>B|V
zU2THbiFcz@$lk9zfU{+gy}iz6p_}#$`en5au9OxyM>!-}f&fO?63fjikhW-yC6EVT
zHdQa7$Q22zC~HxA0*621k|U?z^tiLE9|fk$R6HI~g=%qU4$;TkelK%Tm?Uf#&pz=<
zknyF(@V;-0{|nJQ^rHn$LwzgB2CK%m(CoYXZ{9*P5U-mgn1FF~X_$2wZy_SDYMY!M
z6s2DBt0RgB+dhskwhVu0-T}`>9mTB)_F3>oIi?{<@$@T^%n}4t`a^KYu6>*MMnX7+
zz9_##Jyyox#-oqKy~v&@fswMN#CU<`Id*J#6v?(|vnYck+a#h#mTWU~s;H&TR^+Pg
zBIYl!GkBjMmm0q~t1%h%MxR<c80V^vO`4aDsYkl~Y?1Ua5UZVtF(`hYq-b|}Gc~r1
z)1M>o8LCj=fM8_DbI#6iA^+F<)}YICt5)EsJ3SAYRxddv7KP8_JM;tB*$#j!G9P?c
z_vuKCZEz^YwIZKZM)}BW)r9-tMn=Mm+o?Svnbm_t7wM-Jt$}P1n>D#?<d4jOy`^q+
zjEZG>$3t3xH=3N2A>2WM)<`K;xUrl!Jfd0T?mTk2YWw*7w`HAgOU&^d{%^YhCr-t{
zJ+nUz2(mx7P6RCsjV<(v8UEQhDF*!4V*&m=z`J|jb?zw^YgSc);@6qWrP6b~W+mX0
zgb=+PN9cojwOy3(Qsh{v$~{k1@)G<qGsF>n?D6pB5-HQ!ZZdGZ#diIs@!}e~nXI8g
zdojju6(P1vhsaw2+qg(Yxdw1c-$)Y^B8Jf8mfjLh2~E>NuM#7Mb3B;%e9Cz&w_Ac`
z|NW=Ws)@JtT9jMe;h+dof)wd}JV^X}5$Y#bOSV%JOYImM#NexV*S2T!N!_;i!O{9A
z;{4lZuPxX$ANLgE;l|ZYA^Q%F>!}^&<-%Tm;T`@G5#^3-?=x8f!%Z`k`3wV}PhOsv
z4XsW&!^BoTz3=PXNv%nJzyK`)#oPysBFD<l#qSw1SyJUfM|7urQcl}3jInrm=LXM&
z&>Ck`4zHd>7jUPQ0iIBS9T&f@V|J%kZ$TUF=|eI7LvvHZAh<q4`k&-RZenuFRg=>7
z7@}cixMdGFLt}}-uZfQm_Atw{I@78o(ubI9cwavu^QQi?$)@bXU(W=h1tzB_w}$S^
z*rO(3F!*jt#O*q81x@x-0J+1ijNTr(fo}iYIIYaUn~ih7a_z_fdjunm&-!K<Wsim9
zj8e1oQ8{omovzUD?w*OxZ`Q;Dhplbt{=DIE-x~h0g(?+zC!7g;9z)Fv)lljWiif?W
z9Y{q1Q($I&)zrWjCj3fAwb`^5Op%hVskyPz!pjRt;&c+`aOg+mG?mO+GaTkvOV7fu
zT8Bx!eJj%mBt^%vjQOKvQH*ACXh>jpJ$L)V6zeksVor~86Y7}&x3hzuH%IT6l@@p%
zBVLy*B0(BqH>Qv9LK@Lt(?8?Q@T4WcXv9qOOs@eu*1ZfAvWy$%JTAq$W_||h(k6XN
zdgd9|=h^L(YJ8junkaUwAa*Oc7dv`8>}qpSEA#<oSm!cZePL<?WjBPyHeq!Mt}KZg
z%QYS3EAu26bL#cSByljD&$6K>AT%qCp}7$IWjL+Q1?o9HLz{;d{;7tS?2kXgl^h>Y
zZ?qDSw<_rLsWF;YtFIYxJ$**CfjI4;;pM}nx#GLJjs~jH3=iJ$8l!JREHth=(&R!0
zs-m`YsH!Y_TkmufWB?^x9VeoXk$0-^nv!enLT6#ZrdB9KnHD(o<eYtFH0%`9SqNU4
zyr4x$+Q^;2JeuU9n-CJAO&Qp=#4TjF<lEBzMWSh8+#6gdF~4A0Ye%HW<4Mv+ahze`
ziudJ%6AK!-&9Lg^dItf^FLmjSF~)W5RZsmXp6+lOwO23-!Y)K^eL**lah7v9%?ed^
z6sE-6?r4Y{Z1>iq<_z}-6Mk#RVt<UUX<f!+zO(1IBS5M0sa$u86j}4ipfKW#sC^Aq
zf)mF`s$}+57-J^4GqF8PHS;olk%bsB>a+o=qrD_Xl%ZS!_Exkbaiox4nq_Ll0_N<L
z>FewaX(j69@K2Nc($!?3#2AmhRJP$&lX$15zwlO2(beU^<s6iMnQUlf$ch*;wQF8#
z?GsflL%%0DTmF88F5_UZ@z%;F3xitavzueakca_25?0-5Ys-R@<xV?nj<NF?8aBO}
zG`5w%=v;KZPF7XdS6lQUYV}!pD)Lx+1Kk6DHQj3yEHJfY7OWG3VPD^4Ry!^&J+%-C
z2h`)99OxyhhhsTgTm|;TjMG)F!FGk1U7<^mnWd@5H|MC!`4Wm;5v6Tc4Y<WsC(dFz
z2~_3Xtm?)yDJ-K5M=u4bX%`+A9-)IC<om_85M7ts6%94(G^A>nQ5YQOgi(eF3_$8q
z8GwDr&`QcV!036WHL`mMr6Rp7r1v7{m0wCixR!AKGG|%;lK%w`!?Yn{0S+q*nf64b
z_QrtWGy6@5%4&#a^}6;-OG5t{aZ=+ne5is=3B(CO54x}gENF{pm_8bU6lD%XDHoT}
zthq=nG^2S#ktSI==DL9Vd?Tt62eA5t@x&B47d1TdIQc!AK^Yb)^)-kwgn*vmr<o>U
z=tBw_Gl8%y6N!W-tI$@a3>i^9gmn>8xua^K5~L2#(%IET-!i(%RC&jJr8bT<$rgh3
z7B19`_w2hhwdS|YWGLYnvkw!9Cysl{sXz;T7K;C<_cp0Un1v&dRXa<Ki3Zq1TJbhR
z8q3hRU4zs2a6sI{<>VJF-m4NYG4SDwf2&=$fSjB`LmIC_GX9h-L_@E1tq<|!t#Xxy
zM6WA`r2T_-*YTt)sOI`lb5zU)k3*A&H5tv{3b2yP8@bz;XVOXga8~VUdE=N)toBg!
zwalSy<I;Cdn621!u~L=sN~^=RuNMH0km^QC<UvK}*{v&*olK-xc|w~8#}BF4`eG+o
zkUPktC@oXc9HRQ^T}j*DiLT;k_L^9oNxb%snjzA&g%+g_>aAs%PVUo3Ex3`IHlKnZ
zNPRK$ZWa?H@`cMAu*;7*92v_GRa6O;(EAvPG9B7h1&Z4&*vg#W#AjSd9%yV6V{Awu
zW-WKF`Qj-1_z9Qm6AI<3ZL$)7JQy_RC|x!Po!V~je!5kebZRu0aYSk{@m_L$z>Ovm
zCm=JSQ_b#eekuP;Dm$6JXRTb1l_;M2%Yd}aQxKs_<-F5geyl$Ec=Ez!ci5Tg={C+P
z_HZzTU=6OOb#f!5#AoxoeGbcYT%%e0-IZjHJsmGb$9~!DBa4O1qfj^0v7(WW1W~73
zZcLvtj-Vl4v2c9qZ7yo?3gF;wWq>A!=R_ZIOqFuoGzeYha12hV5Hj^z;ePwnQ37m>
zS%ja1yW?rk@ij+s+N?cK^J^yVp*i|@CRLrVEvQw7CX%3YU<U*vB}%!{Bd8qL$jQCV
z3VsnB?;P_I+~1C~lf)k%nR9Rt1;}XFOC3pK8QKjVdJPIYk&QK!Q$39x6-<5u$B$km
zx{S#)1K#9+hCS_sq>Q3CL0;62EA>Swge~XDnkRj1``A2rH+L3Ug%nRaxk;#W^BC9v
z+R+QrjtDf0^Wv2R{82X1)jO$HV~iGTIe2gc_dO@jcN<&C+Y&*qN;?Ad?RJTn#$JDX
zjJ!pUz|LGC#llo*8dRYkR`Vb=uYV>Po6c6`=mPWI#*rd2(duj$t%RS{S2<(3+>T6f
zTx$rMvZ2c>xkTo$>R0LIdUN6Y3J0*A7iurEUu|_F^H<I`S_@ZlyA;pEp-V^F;cxFf
zjRaw141*=IiK(obmLIIKfT20JW3owNpxPcxvD?afJFRe<qQJdH<=ILqC1~F?blB8n
z#9b~i&K9enxn~g>Rd&e`$;9e($-~galldjjft=+6+P|k#D3PO6Pa<?Cp_;>7XW~;&
zH`2<3RT~TcWGOPs%}`EM_v=P~QKeLCrzexa7kmLWRAmEs4mx{V<!V_n(lh%nt$gKd
z@ol|fnRV+%MG?sYAPbW-o9B7MX@*PRJ3kiRANN_|*$P0+9k1B2i7y?t3S!}+qhmWH
zoDZ|hJECyFik(i?Bj(ml=bYsBPNU@#*c{EzsY4})e&-qkE>~QG5|&^N{?!H5S^#qn
zY;q&8ztd^1TMuQcudKA9xX@f!%^N)vjQy=DSlcxtWfK$FX@WuMtNp;!^d^BEsu&Qd
z-H>6cp7!%GL8b8EYUGFjv8)$5%r80k=J;ietdT_2_$6wzc7VVJ4|SR2-*@iofv1!g
z72pz<B=eQ<*`yK-*{)nSBz}+<wOXu0aV<(`6RNBAsq1-i@geEjt~v4)Z|&U5l5ci&
zBcGGM$w+S#a}<j^*GMI^<`jO#-Ct}_?sF#rWpW9UtP1W8l(AShr^xEM@m46o-a|=O
zf)#eeK#fEprXHk(jE<PHa3gp-<F2IbbOq=I%eIhWc|Ms9NATj@FJf&5MlyUX<NUDw
zemt=yj2t2X(Kj^X;_l&VU{IoB5!g&qGQ9Lh!`gjfM=!DV7@Zgc)Z@yMl`@Njz_D;g
zHu8kJUbAp$1lJlJSV2B$Kz8I++*vf5S4kWk-+*-<97V_x&X*KoO3=vC%6z&4$%Pi$
z6mA(#iCvkt=Hla)OY2UR$1Oxcpn3dAK|q6$J5O-`hWKJ-Bs;y^c-5LO+q>85ORI+~
zIl>#Iw%L>YGqH15mWIX$O$#8FmOdJ@Wf`a<b}hO-qYpNeCGG8?=V2SRjFH_R?lE_(
zNa1H7fg`n?(_XUdK&qJ`oDG+b&BL{EA0yWZYkw5{07_xur~cScu2UH8lJQo<^~OD(
z#%Pe_6CFj`NEh?v`PWAy8>o+MpLxcq?`a2#PDwBAd0<Z}p!(RyLYi(U`JX&I-F`(O
z+8hn`?Aq&uUP}^u5<zPKY!bBD1)>ZO9D=OV?*#O6fI-XMN<x`R{&8<nKqvH(HZ;8_
zd>>x=r|M>PZSPt<tp~C(&z#F!wk`G{-TC%JRUaNgxKA;g5SWJ>(BWpWzX4-M?2eiH
zvMd+Z<S3Srm>DrfDS;J^W#g?1{}qO{UTc|4Sq1(~SZiA0d>(|b$Xn8sFVy~+!Eb5+
zX2)5C>;rfRi--!8<pQ{hKFsGXFR!>9!t=e>?f`pQv~iy~7wp;-N-vdA4<+XT$juMA
zJ{-a#wgJoz^y|4X3yf443-CRzuxiGX4Q~oyx=yV9=PvdlYanHItHZC{uNK)12t_r$
zIa-fa?J=eXoKx0><`GlU3<pJ3l`)Rhj~HWweDHR;1m&#a$X+p*9$CEcitl<4*^7C`
z9|ukE@-Y7kL82fM|Cw-eHuR>67J*T5vtoAhvgl_5a_Slx<p(@(eHAR8Tt|z+dK6uO
zYi(AeCSI)Y^ex#OHb`ev3k@PPK)2-Bd%g~2GgRer1|1Ttf?qej3DpX!x7d8>NAkgz
z=-ma{tIE%Us^sPVTB^r{H0Y#_5~+E`YA)8VOQN;7#Km7Eq^9+TPr1hyyo~%Rzre3D
zY>HgOFHcmck)c9>L^ygvK^ee?Ikn3pNK@-0V~CFM)MAXUhq3#0w^M?O02WJOGmPQ3
z0?Ua12Bm_e4Sh{>8^o=o_gM~TdxDo4&y#$=j*~KHmTOp_OQyVZX(Ce|@C>ftGLHvd
z4@TBE5R$Xu9m0A2!mpS&NTcht>KA<^JK+@{OM4gb`l~;6xI(y@hka?FldUI1=ivGF
zCYlG{24Asdxm+*H91~?_?aKAbg|u$d`+c}Wb)R>v#`sekwv>o(Ct=X$@J*`UUWV9g
zRVb!=v}No-LEK{2aZs*KGYL7LuJl$2_u86gq0@rDiAx$OC0Mc@;gxPVAWZQ<&Mf^r
z5ZBy5_sWru$hAqwR=EavGYbLxKI<?>(P0B$xe8(JH04m6U@dzoc|aC9baxb0?+_?d
zJt0!7DQU2{?es2Jv6np+$vVrUwFW<DcFTzlcIj|DSYt!us$hr0th4zBr()UA!NP}^
zh<fK!;Z;qdh4dQ2rLzz!0!;D*6VJR|^jMe`on%e*tKd|1^s7c}rd6ndlt$QJ=(EKo
z*zj@?#wAqjR;i(#k(g<X_@Lnqij%^BFb%kXEOJSqRTbTlobyAr*MiV7m3$O2;p9Kb
z1N9Ipf0N;mQJGQD=EiCZ<E-DmZz}(*P7*|4c*biFuUy{t3j7vP%cX11_zQG|bHIlx
zXZ$rP$UdvlzFfT_eBtp8SQd4U_^~}J+qW-=j8f3)8|pWBYt-HXUhzUHn!{$HH#nPL
zgOiSA&*P#`G^>}i;UV5zBaQoD9BFwT8TVFw;2}9)7waiw?^7gyMwhOw$HIt)l24~5
z`@vFBWsEqpf4q&ol{O;y0H*}8EXnS)*_KDOha?PnQqOcwPgpq$QCnBWmn95Y8@vs<
z6K20uwbNsQ(Sb!ct~KrCV$z~prC#RU>65BQYNjkG9`s#rLe2SWQT!DnQr9zO2XYuw
z_{E*^Sgby>R#FcQPgJKDnokueB3S3MuA*`EF%^z8Za;p^IM>VM9(vO%hO>xlKAKZL
zlQr3A8RdjrR3Eu^fKsq(5@dz=((e;uL&)U5tZE;!YS#(TOx6bh_yp6arW?;;(ndJv
zi;)T*Iu{&OLgrY{)o~Pe{`AvCzZt3*!eOTCS;oq$7<^r6myj06h&2cxk|Jp?P<F6%
zV_hRm0>jLeqE8v-n9pq_+lpS>ooGxjpFgp!pmq&>ZsQW2qg?<KjX%m#G-SQ28f9zH
z_mIK}^QtzsCDj!3sz84qy%NDF&fX%;<}F{&kx61-sC=7bWyqBGKDH5Rm|J~qjK#7k
zMUUEsLsMAF%7aU4%WQ%L3(kGfLj8wA$rZS^gAV=deGH73ueavx3Z3`U)R>Z>=|uL8
z?G^Wdn4U?{G^NgLhEI{!<&QpBukq>)d14)PGzRytwWO{uwBDqf6t8}=q`bYULfffN
z_uEmhSnHd4Z312Wh*)x0)<4N4W_B%htfr`&x@LezX?3BP^#a>8DpebmBCL3=#<!5}
zjOw|0cT{cyb0>+HtpL_)QqMq*EUaT&W6j8;FaoIM%Am7Z7GtpmM_<KRe?`vF>KWEb
zRo3^$nKm2yju1zpSkgOtFqiv|zO5yz_LtRYLbld%^g*gG;jO}PASN}<_G?$Qu#c0o
z3ZE~eIY?$|vYD->iDA}gzWoXjLqK??h?Ge{)R_1Qv5?dsspW=8wO@24$-|`W++}bk
z`ejH4S(hTO+jy2u7{>etWsI$xd?12b9LK_QaqQ$g$&I{FDfud<rB|#IgHDGJs$_!S
zt<?umzg-hYl}iwou6QG}Nya$Wbwz{j5pXj|IN-j;|3CuVINS+plVc8g^HR9y7QtTR
z68&fZOpbu*f{omYGpG;u^$ur#B(E6;Iw-lJY>ea#<t1&#aJCS8fRZYHn1DI&0bQP~
z0_r8+bz}CFaUoZUyqLK?W!Q*J@bdaZC(q}o!qA*`9#I|-n1y=$EnY9U!zaa`1CGK1
z+w85g&AY|wX-g`%GD4axrvyXQzXZMZxP<=7q@9`QZ=UPGEB^FtuKA1j)a<aMO59A`
zp;5HM+VB_?Ir%r0aB$4tj04ISZ+fM9(~=9Mb!JNRE^hCI)49UaPm%?W(tZWp^!(}g
zn!K}>KEI^}5L@`C%VKv0b2%&};PT#+!tIPs0c=TGNloe?vBa4y3fBB5<o<EKf>=?N
z1}zE<dP>?Ohrt|Un*j!ql;(yU*K?o7ja>%yiTbkS3{U55fdwnup}v#DDc%p)pr1)q
zDTT@lbWnOgM`htA<sr&S2tyGe&lJeJ^STMUR5d!lx#V9w_Jw*Wh@6C@Dtmy4SN>6U
zKOU208Wmky#jS6N6MdFCg@PX9dF_=0VJZ~~ybDJv>O<pJl<Em9ggy$)s+>(veowHr
z!ixbX(%P5(mbr`<F&U|h0ba3#XqaPDN05QA-h3FrmS|B37ZCo(It0mTy3FNGvRdQP
zVYLI){0VLrq3K>;s^!JH6$^PY=n<)8N)qT0IgA0t*{_V|=ZX{10=P0dCestbLBmwu
z$V4)-SoW7m^amqYt8#-Fj)PnAXgqnZUR5yo64cYFn)so}M@~{1!-CiZ(<btylMI;>
zWX8mDDxqS%@j|nDt>)=cRwLM4cE!!{b(nr%Cg{#`<YLT84Q5TAv`C$bz2|m$FL|_f
zZ$Dy{^+$#A$&KM=9&(xD^xAT^0dd`Yb5o;vLTfy0Wo@@*?CVuZ<MRvpn{tza-QN03
zTPJl$B38Voq#u>knXnjT2kwxNe;m=$QTqJck=LAkEk$LjEWjO7!GUdfABP2!?qix;
zXZ=E;duB<Ju;f_5!`x7QVB<~j0FezY5i88PwfhWg>gEj3D4t)SjVl-uJ(xO!+TzKm
zT-)qIem%?VhPvZ)_9Mxf?U*G=ta*mL_55D(#b`%ao79|i*3&O_0fKwlBhFE}_LfBg
z&XKFJx(@uycJTF5)-$N@ABVXTVP-XJ`Ek@N+0iQdG`P5$$mbD(#kIj&?p!=TKIW|6
z9WR{3Tc3<d6wu{P&@(&5W@Fw$tjK%=^#2L-pUs{&=<VRN*}FKBZsK$M!m76@xbT)7
zeDyl(NjdyDrl)!K5kLGoz3b-n&KvGigy~eL9r|ScvzTMpCIp*g$0<n1mv&Qm^$X#q
z_}0VrWEYq!;_W0xh=Uh{Pc+1KNg^qWo8g?h4%m%VyWnexp_%3!FlASInzrUJ-E5US
zyg&3YLU<f2Ze~mCcyT2dH!Oc)EN7+KMmYy_R3Ci0(iTAzk?e|i`)0?ny_A_Z{di!w
zYs7IwEz>fpX<p<MI*Z7Y&?=~j)km+RTUKi1QzwWDJg&I8Zg>&JJLcO`n2X`ghoJls
zatJjHqn&;`S3_5BeJD*?gwH<6^33Hz8RqA+Q{I5a9y~lt(|zTU{EmDP`I?%$1--y2
z1S@3;Vo0xcEcy9hpE<H_Nbhk(7o#zc8wqyyRj4zhBY64?BZ2H|*Yg>3A$isSsIKDk
z&4wC2*j=Zbv30yrX<JuLO>sP=cF(SjNle<NR+7mp<{hQSx-3s7tQEgzUE&q;<!cxy
zL~ON-vpe!QM-HwfWsXkk+9Bl_KR~8jMUNreXtMGTyzez^!0-Ba`ymL35^%KtPghmG
zbx8_##%BL4vS+Ixx{a^8-`?zg@pQZ(BPM`_6$OFRc@Ob0{0RZXbCD<U7{m;4ls3fB
zIEao1yC?_QNhKE5FKbj;Yy4)aD!!BrO0}c&kxJG3(0x(QEv$N3AGcM9VLDYZYm)T*
z!_d*~Ctk-m)N(hNp<Cw*?hj`*WX=b3a=D<?amjo;%~X75U4@`m;7uxCQ~NaFrO-0|
zmW9cC<X)cgAfH(F8=v^V)gjzIYUf?n96$Ylb?)(yX+Nm$QV4XR{X_mDG}O&Y&>LaU
z8`Mv-Uboq?mig026Z_Rz9$Egkigq4F2D~zDw|R@XCVQR}`<^D}w;*R8ie8TUwtlM^
ze9T(1E!U&XwND1&5s**pDQqksFFnu+T(S#vx>{1uOeOHwyy7bg2eA|jXUlqqn1clu
zs1)R+W{{^}PCx=$6eG)T097XC-UntR_vsL1W}=4B>%mDfS~KZMF+!TSSWlijCL^bp
zMU@J!0G-BxIHGf$>WBV=KJ^d%M}7JpwR3bw*YBFc{rtyagM?5=z(?fmxEQUz8WB^(
zk2Laf;c0Nh28B_7c>{Rf0lOY)EVnYDMn>)*;xR9eC0hKrkbRvmz+CMpNoDznQ)E`S
zwMUP3CS{-8TTy>+g)gQ&^PtjKmE{r>yZ#b-#$}e%V$;kNe`A(3oR&}9l`B@lPdC41
zzgxLoQWzWkTs5QIs6gR!i6%4oeq~nxQBZ;r^(d!<mLoC4!v*fv<k8*p7fyQgZ(kQw
zHExQ2364COp3kh&tnxG_d}m0^7#-B9D;`r&LiA}x|MbOG^M*3zXcOIA7^G}MSQ`dB
zQDRt9WtCK_R_26BJ)?BaM2TV*0U5W<MFHYilxPWX45xroL1A)LD-k(D(L?38!Txhd
z9Z>=@5j2?WHQ7{sxf(6+a3pA)vn{2^CZQ;Hr2PVTo)OLDStgR!v|jJJ?!Aha9YRoB
z$Ik3s;3s~+Pxz?V;xZta3K3%<vrgGKY9}ZnlzTB9VFvqCYJ`HsYDKN15wTkrE|`*?
zw=Oo~h+xp7JQp7~Mmu4RBr+}L8`8}D?y@%2Yhb&Y8fxj))>{I+U>pg3unEaAmxX6c
zD{%Z<2{MrRnq6sovG}xME;*B!1rY*v<V=;lJDE4-BO*FtWGz{$HEPH@JAq^_ZO}H?
zZ3cQWDIab2YFTn3TbQ+UcG=h-VJh&5B2H=|@KB{Fu7FlGxb<lbX))WVX$WyGbER*-
zBY>3L!%yn)_p~I=jyo>vw&8PziYH1yiAt2P!+jh|w!z*Zx`ZF7+(*2elpjd@(b=3%
zu4K~TxOqdQee|IBENTeXPz#bp4|zNP5^%ea^LQdbe1LjfX(hs?E5l9kP)N5e;?=mO
zgeC&<S%fWo;7(a(nxd%RQRgPbQl}dmiI|4}s9%xYB}-(eiiqKw;0vBFLGJ>+>0qCa
z)ecjT(WRhtxJyx&I@Rnj#k{nMd{giWGzMj+*moJ03`K%HB|^l^+gsP?B?iptS<p-{
zjHe*4h-mdYJ>RoYdiy<OiLk3)7qw*>g~xq4!NpM<dG=gXr7&=W4<#*LcL@_NDti!f
zE}Sq*yEVAE22o=OL`k)fX^yOPp2wHC2{BKzqafun6X@qQ!g-z5EtfaCj1B5zReGjA
zVDju6em$Q9@pV}@FkQq@o`6c?3{(6OFX^dPX&{DGXE>A`mOcY&C55;iMX(!G+e#s?
z&9<`d#c(v0b!W8xyN}VU@5bBr7NzP&qC;5&qP`lD$tiX#_!lu}iz+6ZBVLidbcQRS
ziGcZ35@OLi@h*|2CE7qmS1h<lpJ8w$CNLm-BHW%->T?#wcJ@H~R1c(vY+JAin-8bt
z^s}6OmMS+D#33r7S+q>-iQ#l9%NE)k2ssY@sK7VN9&)VWae=U0z(NqGJ3~bLJHZm|
zrQQ=fxw`CtoPqgI^qpeJqVc0t?jgq{Ic8{y+8SXm8V}?R-Kbf2!ZYCegFEDoPV?qT
zm_yY~msGe6ds#Q1>OU)C(tlC%bQQ{&MW54tXf>>Imqlk5QL+}fzfcepdZxp`AZgs8
z3ZZ3`HclI4_Jr{@8*RL|W^G0wwzb^k%UKNDxBTmFL$B)yp7snBg>y7Dh{DfQs2y!#
zS34i<mb1}2<m&b_39PB~d@A-EM9H^2^T5kZu(v^s+C6p<(K49i77|yyKpxj9i&;5Y
zXi}T`>H^JKjcB(I%W%f6DjkG}XCNn&l7LI?Z*M^!MmifQdLo-a%7J<S{wUMkcD7mq
zi>FiH$ycR59Jm!795+GNtE3oYs}mmHWHl<=(^}LUVW4-()QoTcNKq!$Ib}o%>uam1
zg7PtwaX>+ei;Ut2BaXm1b23iDjZ}(`NEt^%e#Kda3Lec&x$*g_dI~vVrpu>i=GfhH
z6f<S+X+-c@74@Yw@a>ICGDUrciEb#5>%=!^`9-(u<rU)dosuWFqxClXT&mD1L~;ei
z*1%Ng3^bkWm{Bq-sGIUe4af-ei{$5KdJig&6=Da$D8rg`k5Vzj8lR{PoaLMvzbYt*
z!<kLCd+4}YrCDy$&|2<9-FucX!FX}eF5#D1uw0zC*XudJROW69y;+8dp`P3YhM|wD
z?*HXu%hP&nzL>-(?-Qsn9Bc+SJ{Ps+wOSxpa~Dk^mk25dg?)Wr5v{p6h3$fJ_#P_I
z=6G{p_2bY;CCEN#$Ub&6Xgf?cZR+Y&+g?n`q(7>aWZL-rj1$6c)c$*`Yh?KW-<m}>
zH`Mq=8EAj|7hlQuoS2jQ7T4LHalLAH<A$>Wr=*x!ePjU&srKCa5fVZ0gQdri$2*FH
z0z)VwhfTp{6mVOej~&-r%-h~@?k_*MGWX@_t_7VUrp%t3ocN-*_+ZW${tQf%l9;BX
ziU4jg)oZG;y7L4R;`t-iwA_m4h`A5vZiwMvv{Rlf!ah3qJhnge2x(7EjfuqQwF4;4
zB4IG!Oe=9HN=knQhiTr6j!7}NQ3=*FPOwTgu$qrx^)S{L*6%Ch(XLcqEI)`wIZ9)V
zHblvRpaQRU!Y9_Qri^v6c^yJA|LKd#;oDY(-WPK_FWpc`d<-;W_K3y&lH0^sk|@qi
zqgk-iDfzbvJ{uq6wCo5~QS%1EC06$49kQl07ZI%w#k%Ijzi7i8mm!Optd`Z%*NutW
zGCqv?nwR#hcBrDTM{YMkR))&Jsv+3N^I<_gT*MUGdu++~(1iyCcAtUML;;%=`00*r
z(PCGRJFeM^op~*}(k2{GJ058BScZ<)>C1O>pVPP;6}O)g6b$4NJ4CzGEDJiKo=K`E
zI4;*1L)EN3r<iF=qPTi+)}Xpt+^#M2m<$qr_WdjKxt8j&76*K56}DZXuvar?DYoRO
z;P|Zj5u*E;2P2LzMvP`R9VyznMj6)Xrh^!a9jpAM(twB*V_yyMI~hF+kchJ0mJ9je
zzX@XUkv6cANzyoXavX%ff_b`5FLs{b`k2pSO){<Zc?9PBQe?MXh{R9uyX8=!r`ovl
zaoLV!&#^r6?P<u!=Ja1;9u>^@jn%t4WzW>fa(@wvi*y3R=2EGrdhSv)!qHzVt4;83
zbnwd9c%6Rhc%zj+q<JU<Jyo~vf-AOs%Ax~fL&LKL=aO9aD42OpbJPzna~U@~iaQM2
z8o@J&4CX+8Fl?DPrJaIxZ^-tPF{SnC`%Ao(&Lp<bntg-?gZ(S8p&<C7KxoYf6gLG*
zYoWEJB6a_DeOQg~7$+IUMG|8oD6)A`*BKf13fveb&Us{3vU%fkYiN%-Z|U56*+)`3
z3Goxz`q`^2TblV%7d~6XQBjljAZN@eep4q#y4_;lDZ=~+8_!H>bU)^u?6@%V7uwsY
ze@5fDD170GMBe<A0kt$8r!*7m6KUM1^w<yEBFd7l>njnqEI7!w%~TqTtUnATP!Xk1
zv9~B1iW~8T&Z3~VatDI>%W0tqF0C|rxga8YqwJ=Ypgmh<poU3qe;(IsK96X)n9Dn1
z{}ji*onk`loF}5GMgE#J^AtMKo@!HZ36(8N6n(>tC{a>@syNvW>y)2VhA(y>8modn
zSH<l*eQg^wsZ`opIPh`ria3R*L!MKQwfeym2{g|WZeeOESFBvN<l>_lwX7?Cn@iRy
zULF5ShOL9K5FM0v?1~k<t@x%=rYw6U2n)b>+IrtIo<Xv`6yyF|SR}I1{=PykxoTcw
z?!+3RkDtWXVOd-YxQw;25*rm3nCKPIUOY^K9d<$quGkuY%Uxh*#?CGC)i=(kWKxQy
zU4Dk&aL5*`c6L2*L$t1$=Ck&;HRua#kq>!eFPJL|Rcd-<bS>iL8R78@6{0Iuu~{#j
zu)Y1`IC?!Og28kz0y#n-_K?YoC85moVImI!5-(&}LQ+x-eTX-r6uX9>!fDwVk|lXW
zb<c+J@v{;Ui*&N9gQQ_?dveb|y<*=m++`CerO`q<HC}k(&+a?)(VtxpyHn6HnK8tM
z4ZbTCIlw-ab>zYMV3d80cL0K$Vl8H0@A}{(BY5i5{q?yNfpCR2t`eV5pf(SjTV+16
zmtH1QFt4zr4!mJg(Fwp6;Zto0OqpgbV)S+`aE~qsv4Z6|z^z<%rQSB1W5*UY_{`I9
zW@cY-wv~ZEtv7MxALbt|T%@XYBpN+6yrtyo!0qevBGhoox`+phW2-Dfm)~SDf_=Y-
z>k;FoV&5wB9vB%b942rCD<^0p;UkSMwI1ks25!gBBloDh#Xhfz&kSndsb!q)wdrHh
zndwRM+8f9G<mP#>6V$w6Yd5YhZC0*kF)_1!`~!Tug^9+-bo!TS6ie*R_n8#%sxIRD
zWNjr1-vxE7iDO;>N7dgA>YH?(i0e-;AKIRB#xA8~5lWZgsF*cnf`cfda=e<5kD0DN
zw~OkUU!ggRb;Wx=zhtv_XrQSZBkgEws(!3j!Pk*anRw<x(YF1@Fc)i*>J4+miGoph
zp*7n2z-UItM+w=gqI~I*P976e%)&*6;-!vrs(C3irt7)31f}w<MjV$MRq7F@LNx4(
zLSH{~_1yO?Ge=|lXYBi&UyR6ozTyNSa?vo}8fIw$-svx#{3$vPGheO<QM~UzOpD!u
zuHvjMe3Pl)Yi@TKXWx&ooq#s8!TJSrLEV5ekgpbRq_pr=?36T0_;^Z(ZS^7k$Or<p
zZI$xNh)?r2zL*+kN*+k-F^Z;ZXEqk9G$~V4vdKqw;RDncFJZdTT?Sdm<1G5oNwGHt
zlaCpKEjnbUV_TLO)v9QD4w^Dl-X5mvS7Rj`K96|x=-k}=8nLD+h>7)ft0SYN|G_}J
zYBflJOgG#_K@#YzQjj;UK{8ZZ&`fxt73QR!?;J+6+^v9fgt?j5)SEo1al0G=yO;38
z^VYC-K)|Y+f%N)CFgr&Luz8H5QFF%)iw2PS`M76i*$sA_ICaSwY4z(U8y+-t^O6td
z2Y{b$fuFeP=4k_VW8sHGUxLklu-|Z)wu67OnD~NYWDUJv04l%LI1A2PR&GOckV>(J
zNO1_V9qYEL<8XOz^aD>M-3Azpia8KG4p!Y(;@ZsGgDEX8sUnVhy6<GVFLb(Zus@qs
zb<IZZ^c|;bEv@TxO5=`IZ0@l5?$?xNFo#dsYCX52>f4H1@M1L)*ZB&(wW+Fz;TQQ6
z_ABltEFzP+ia4sdicjgYm8ld|1e5g1wsFc5dzunRJQ0)5&`-lnM|v<e?Ue(xiwynD
zbcJiZlZ825J41H_IF#pE87wW<)Cz-<Vrb4`OVT0hvtK8hBDzdl9G44OlEhdVf!|!0
z0uib}!5)Fbfq(#R4{Crjs=z&kp9vNOSj7EbdI<96f4`9wl;@)nl@g-6=>_>MD-6iD
z<E?xnf=8Ob+f9Mz(LeeA_dUy-3V8jQRgzCiR7g-xo>o%mZyyd^ocNx;eRu=#|Nlk%
z`db#jBm8e!KK;m|YiVu{+^?{u`~S}i51i7y8(i#fd4c;KzkTl0A9=sK<@$?)fd#4G
zGHU(E`1>7^pSl0hv4>AceKb%`Wgz?+5Ul=Z%JL5Tk^8x}tv)OBKMPsEr2Z#+MC*#b
zJ@7^)z-!Alk54~RmbVNb%l}HuuWe?g1H>8PH#63^u={ZY`98=_z>Z=8C^ysNe*zW#
z0^I^){Q>l6dAOVsaxtNR6VV7DAh<smz-yNGBVd*8F2>TrLSNU;*wW(9D5PV2f_$K8
z27t|y-9Z@<e@6-E8)(~`*$Hdg={ssW%jw$yI}qQ3C+b`LxO($@F~7BjwpAWzNC0>N
z>|~(bfhqzEiT{&NP}j)vA6@-hu5XC&2A;z~0I>!%FpN8hFv{N$|48#Eb$^5KVb{IN
z0Q6)F=;<yb0ysVVQ;zQt#y?Af!)UY72OK@;10=ay7$eqyLH>>S*2Jw{8#T@W^TPtR
zb%3MGKU0>s0mpa5ckTW;|0{bvkOH6#+<@@`7W^{-<|}$PKd?XapVj?MBN5w=NWKE4
z_`(1J0@&csl;v%sc$f5#QvNJYAVdT04v;q-Fq|hp8Q<(L)9g=_zgx@KnY<SX0A&hz
z?l#T=^Y66Z!wBCuwkt}44RfGjas$@#E%?`ul;s^`@gw`sIJ4h^X#crPXK_7WX92`(
zpn2Y{BX;ZW7%@djMPq%(f9L<ESvSoYTui`Bm;if2{~-hLn&mCv@IAkjzMZ3`jp?ri
z^ZP}SYzl2B08}6Wv=Y=ij0orN4B+0Iou#hj_Z9cw0pAR5RVdFY6{r*#K#;rDyBzcb
z0C*)Abkfzg`g5P~ttc|-0b5A`<sm@1Yi{pDexrzKJ81tOq<76lNa+CSn_}?q6eu+8
zH_~^-{2wq?g4~!CV0<e9hTXfZ*);qo?AzVI|LRx1X(&hSAORj|$wC0(E`%rcJ49CB
z&fdmCTE_(FH~(DM(d^GcP5_hJ2JGc-m*A1|9U^B3gqAh@@2XCBcZfF!OywBypUvJM
z%E|K1N&k-cIRNwN*xK1>>)P@EGa~<4ru+zV)UQCnascCf@`DDvW_io!{R?PgqwV}x
zng|?`<$IvgG=U!cUd5H)r+pjx{uB01=d(H=Ob-B2^Z-k^JAC?7{R{TDGSZyKTap77
zkOx=*&_sTwEbpS~e}gOlt=a1WKBxXuxNpWTTnRri0+cihC@Jwz;y1lL24+wH8zXdo
z%!Ga#=wFG&H#gFKK-HQ8J^qtBM5^Zd#Q!<s{VUBtR)0Pfpbh`aP?)>!(|$zq`71C+
z;wh9ZP}o*LJi<HTJsP|Z{NCgJmFIP4oge^onxsI7bJtY5ChqgT4FrFM^&fKEo&dFB
z2na}UM>vti`!K+3tEJ7~&3N)Q#H<-;6i7g$Ai0BcTfL9_b#dsg60$y{#c~GPehkp|
z>F*#tckd(RjO^|7EFCTGOr8EpUwZf2xEg3C!@wty-JuHsE$DyN_%m=XpPruaw@#eT
z%F4`GSNp&F^Dh?bPvHSzKA>2lJ5au(`_Sk11_t^z_d-GaKFoY;GoNkSdwc;<Jn-DD
zCE&`@Jt?0%+v(f>2-5rKu8UriI1vnxFBy>U?g;+k^gd3|!ruHpCTj8e@44jx-O&Ol
zV2=1RWqAjl{|WOwG##-hFlDm&El|h((o$5dbs+*wIH1Pw_Bte&f8$Cze-rl4MdpT<
zYC!~KEd`#tjmF{XFU&u~q%$2NBLI*;Amm*Eo3H-`bJ8}qGSmM*ZRYTG7AqXk2m^rW
z$djL{{MIeT-2MmiFDl&UihPU;NZ1KHciZ7S6zDyJ`Ccve!&Kg{m9ZlfBoYAk9(eBd
z+e|S3fmAZKGy2W*`F$dyU08Q8P}DBq0X&HPOj+KEu=k07n~*+S2w~I%a@zvWw?h*@
zQkM5ixPNhe%uM<IT(yIaY}=jyARTz_vabJ|_59B)nlaWWPQXNj0Xx2{vG>3q?sxkZ
z(I0Z%iQjl%W7|q1s(k=a6d055S~1Im`^0Zu{|jJF^z-c6ec1gd<-gR%jeH;W_x|L5
z!0=zBtN$eZwVe0k3;zP*q2Cwnw`jxnOL#vx>n{vB<{vTtR~xt=ee)ME9{WD<ek{)W
z!rc!L_>0Ad|DE;MaDn%E_X8vS;w6yY;k~#UCh0!sek6lm9D2HcasJY}@4JBi#o=ZA
z7w30}@b?9}@BaH2D1`YBpkL0we+PY=7T@;+{0r32_8s)wlGa_X!1r1A-LU>*C2;<m
z^=GH7zcas$^7q{-{$i3m`!D7l=Zg1%_dUD(0&a`{8~9&-UVf*3Q{#R20KceLGXJLj
zn&sVf7I2?>|C;$PDjYD!_+MxFTjkuR{<#Cbf57(_bH(7_%s(9W{awg!HFW=Y-Y=k|
e>7RkW|3L}~{8a%0A_e?wMF#;%aRUZYkpBmm4H6#!

literal 0
HcmV?d00001

diff --git a/pyspark/pyspark-shell b/pyspark/pyspark-shell
index 4ed3e6010c..e3736826e8 100755
--- a/pyspark/pyspark-shell
+++ b/pyspark/pyspark-shell
@@ -1,3 +1,3 @@
-#!/bin/sh
+#!/usr/bin/env bash
 FWDIR="`dirname $0`"
 exec $FWDIR/run-pyspark $FWDIR/pyspark/shell.py "$@"
diff --git a/pyspark/pyspark/__init__.py b/pyspark/pyspark/__init__.py
index e69de29bb2..549c2d2711 100644
--- a/pyspark/pyspark/__init__.py
+++ b/pyspark/pyspark/__init__.py
@@ -0,0 +1,3 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "pyspark/lib/py4j0.7.egg"))
diff --git a/pyspark/pyspark/java_gateway.py b/pyspark/pyspark/java_gateway.py
index 3726bcbf17..d4a4434c05 100644
--- a/pyspark/pyspark/java_gateway.py
+++ b/pyspark/pyspark/java_gateway.py
@@ -1,19 +1,36 @@
-import glob
 import os
-from py4j.java_gateway import java_import, JavaGateway
+from subprocess import Popen, PIPE
+from threading import Thread
+from py4j.java_gateway import java_import, JavaGateway, GatewayClient
 
 
 SPARK_HOME = os.environ["SPARK_HOME"]
 
 
-assembly_jar = glob.glob(os.path.join(SPARK_HOME, "core/target") + \
-    "/spark-core-assembly-*.jar")[0]
-    # TODO: what if multiple assembly jars are found?
-
-
 def launch_gateway():
-    gateway = JavaGateway.launch_gateway(classpath=assembly_jar,
-        javaopts=["-Xmx256m"], die_on_exit=True)
+    # Launch the Py4j gateway using Spark's run command so that we pick up the
+    # proper classpath and SPARK_MEM settings from spark-env.sh
+    command = [os.path.join(SPARK_HOME, "run"), "py4j.GatewayServer",
+               "--die-on-broken-pipe", "0"]
+    proc = Popen(command, stdout=PIPE, stdin=PIPE)
+    # Determine which ephemeral port the server started on:
+    port = int(proc.stdout.readline())
+    # Create a thread to echo output from the GatewayServer, which is required
+    # for Java log output to show up:
+    class EchoOutputThread(Thread):
+        def __init__(self, stream):
+            Thread.__init__(self)
+            self.daemon = True
+            self.stream = stream
+
+        def run(self):
+            while True:
+                line = self.stream.readline()
+                print line,
+    EchoOutputThread(proc.stdout).start()
+    # Connect to the gateway
+    gateway = JavaGateway(GatewayClient(port=port))
+    # Import the classes used by PySpark
     java_import(gateway.jvm, "spark.api.java.*")
     java_import(gateway.jvm, "spark.api.python.*")
     java_import(gateway.jvm, "scala.Tuple2")
diff --git a/pyspark/pyspark/shell.py b/pyspark/pyspark/shell.py
index 7012884abc..bd39b0283f 100644
--- a/pyspark/pyspark/shell.py
+++ b/pyspark/pyspark/shell.py
@@ -1,7 +1,7 @@
 """
 An interactive shell.
 """
-import argparse  # argparse is avaiable for Python < 2.7 through easy_install.
+import optparse  # I prefer argparse, but it's not included with Python < 2.7
 import code
 import sys
 
@@ -21,10 +21,13 @@ def main(master='local', ipython=False):
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument("master", help="Spark master host (default='local')",
-                        nargs='?', type=str, default="local")
-    parser.add_argument("-i", "--ipython", help="Run IPython shell",
-                        action="store_true")
-    args = parser.parse_args()
-    main(args.master, args.ipython)
+    usage = "usage: %prog [options] master"
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("-i", "--ipython", help="Run IPython shell",
+                      action="store_true")
+    (options, args) = parser.parse_args()
+    if len(sys.argv) > 1:
+        master = args[0]
+    else:
+        master = 'local'
+    main(master, options.ipython)
diff --git a/pyspark/requirements.txt b/pyspark/requirements.txt
deleted file mode 100644
index 2464ca0074..0000000000
--- a/pyspark/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-# The Python API relies on some new features from the Py4J development branch.
-# pip can't install Py4J from git because the setup.py file for the Python
-# package is not at the root of the git repository.  It may be possible to
-# install Py4J from git once https://github.com/pypa/pip/pull/526 is merged.
-
-# git+git://github.com/bartdag/py4j.git@b7924aabe9c5e63f0a4d8bbd17019534c7ec014e
-argparse
diff --git a/pyspark/run-pyspark b/pyspark/run-pyspark
index 9c5e027962..f8039b8038 100755
--- a/pyspark/run-pyspark
+++ b/pyspark/run-pyspark
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Figure out where the Scala framework is installed
 FWDIR="$(cd `dirname $0`; cd ../; pwd)"
diff --git a/run b/run
index 15db23bbe0..8fa61b086f 100755
--- a/run
+++ b/run
@@ -40,6 +40,7 @@ CORE_DIR="$FWDIR/core"
 REPL_DIR="$FWDIR/repl"
 EXAMPLES_DIR="$FWDIR/examples"
 BAGEL_DIR="$FWDIR/bagel"
+PYSPARK_DIR="$FWDIR/pyspark"
 
 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH"
@@ -61,6 +62,9 @@ for jar in `find $REPL_DIR/lib -name '*jar'`; do
   CLASSPATH+=":$jar"
 done
 CLASSPATH+=":$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
+for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
+  CLASSPATH+=":$jar"
+done
 export CLASSPATH # Needed for spark-shell
 
 # Figure out whether to run our class with java or with the scala launcher.
diff --git a/run2.cmd b/run2.cmd
index 097718b526..6024740726 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -34,6 +34,7 @@ set CORE_DIR=%FWDIR%core
 set REPL_DIR=%FWDIR%repl
 set EXAMPLES_DIR=%FWDIR%examples
 set BAGEL_DIR=%FWDIR%bagel
+set PYSPARK_DIR=%FWDIR%pyspark
 
 rem Build up classpath
 set CLASSPATH=%SPARK_CLASSPATH%;%MESOS_CLASSPATH%;%FWDIR%conf;%CORE_DIR%\target\scala-%SCALA_VERSION%\classes
@@ -42,6 +43,7 @@ set CLASSPATH=%CLASSPATH%;%REPL_DIR%\target\scala-%SCALA_VERSION%\classes;%EXAMP
 for /R "%FWDIR%\lib_managed\jars" %%j in (*.jar) do set CLASSPATH=!CLASSPATH!;%%j
 for /R "%FWDIR%\lib_managed\bundles" %%j in (*.jar) do set CLASSPATH=!CLASSPATH!;%%j
 for /R "%REPL_DIR%\lib" %%j in (*.jar) do set CLASSPATH=!CLASSPATH!;%%j
+for /R "%PYSPARK_DIR%\lib" %%j in (*.jar) do set CLASSPATH=!CLASSPATH!;%%j
 set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
 
 rem Figure out whether to run our class with java or with the scala launcher.

From fbadb1cda504b256e3d12c4ce389e723b6f2503c Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 28 Dec 2012 09:06:11 -0800
Subject: [PATCH 033/291] Mark api.python classes as private; echo Java output
 to stderr.

---
 .../spark/api/python/PythonPartitioner.scala  |  2 +-
 .../scala/spark/api/python/PythonRDD.scala    | 50 ++++++++-----------
 pyspark/pyspark/java_gateway.py               |  3 +-
 3 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonPartitioner.scala b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
index 606a80d1eb..2c829508e5 100644
--- a/core/src/main/scala/spark/api/python/PythonPartitioner.scala
+++ b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
@@ -7,7 +7,7 @@ import java.util.Arrays
 /**
  * A [[spark.Partitioner]] that performs handling of byte arrays, for use by the Python API.
  */
-class PythonPartitioner(override val numPartitions: Int) extends Partitioner {
+private[spark] class PythonPartitioner(override val numPartitions: Int) extends Partitioner {
 
   override def getPartition(key: Any): Int = {
     if (key == null) {
diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 4f870e837a..a80a8eea45 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -3,7 +3,6 @@ package spark.api.python
 import java.io._
 import java.util.{List => JList}
 
-import scala.collection.Map
 import scala.collection.JavaConversions._
 import scala.io.Source
 
@@ -16,10 +15,26 @@ import spark.OneToOneDependency
 import spark.rdd.PipedRDD
 
 
-trait PythonRDDBase {
-  def compute[T](split: Split, envVars: Map[String, String],
-    command: Seq[String], parent: RDD[T], pythonExec: String,
-    broadcastVars: java.util.List[Broadcast[Array[Byte]]]): Iterator[Array[Byte]] = {
+private[spark] class PythonRDD[T: ClassManifest](
+  parent: RDD[T], command: Seq[String], envVars: java.util.Map[String, String],
+  preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]])
+  extends RDD[Array[Byte]](parent.context) {
+
+  // Similar to Runtime.exec(), if we are given a single string, split it into words
+  // using a standard StringTokenizer (i.e. by spaces)
+  def this(parent: RDD[T], command: String, envVars: java.util.Map[String, String],
+    preservePartitoning: Boolean, pythonExec: String,
+    broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
+    this(parent, PipedRDD.tokenize(command), envVars, preservePartitoning, pythonExec,
+      broadcastVars)
+
+  override def splits = parent.splits
+
+  override val dependencies = List(new OneToOneDependency(parent))
+
+  override val partitioner = if (preservePartitoning) parent.partitioner else None
+
+  override def compute(split: Split): Iterator[Array[Byte]] = {
     val SPARK_HOME = new ProcessBuilder().environment().get("SPARK_HOME")
 
     val pb = new ProcessBuilder(Seq(pythonExec, SPARK_HOME + "/pyspark/pyspark/worker.py"))
@@ -100,29 +115,6 @@ trait PythonRDDBase {
       def hasNext = _nextObj.length != 0
     }
   }
-}
-
-class PythonRDD[T: ClassManifest](
-  parent: RDD[T], command: Seq[String], envVars: java.util.Map[String, String],
-  preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]])
-  extends RDD[Array[Byte]](parent.context) with PythonRDDBase {
-
-  // Similar to Runtime.exec(), if we are given a single string, split it into words
-  // using a standard StringTokenizer (i.e. by spaces)
-  def this(parent: RDD[T], command: String, envVars: java.util.Map[String, String],
-    preservePartitoning: Boolean, pythonExec: String,
-    broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
-    this(parent, PipedRDD.tokenize(command), envVars, preservePartitoning, pythonExec,
-      broadcastVars)
-
-  override def splits = parent.splits
-
-  override val dependencies = List(new OneToOneDependency(parent))
-
-  override val partitioner = if (preservePartitoning) parent.partitioner else None
-
-  override def compute(split: Split): Iterator[Array[Byte]] =
-    compute(split, envVars.toMap, command, parent, pythonExec, broadcastVars)
 
   val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
 }
@@ -139,7 +131,7 @@ private class PairwiseRDD(prev: RDD[Array[Byte]]) extends
   val asJavaPairRDD : JavaPairRDD[Array[Byte], Array[Byte]] = JavaPairRDD.fromRDD(this)
 }
 
-object PythonRDD {
+private[spark] object PythonRDD {
 
   /** Strips the pickle PROTO and STOP opcodes from the start and end of a pickle */
   def stripPickle(arr: Array[Byte]) : Array[Byte] = {
diff --git a/pyspark/pyspark/java_gateway.py b/pyspark/pyspark/java_gateway.py
index d4a4434c05..eb2a875762 100644
--- a/pyspark/pyspark/java_gateway.py
+++ b/pyspark/pyspark/java_gateway.py
@@ -1,4 +1,5 @@
 import os
+import sys
 from subprocess import Popen, PIPE
 from threading import Thread
 from py4j.java_gateway import java_import, JavaGateway, GatewayClient
@@ -26,7 +27,7 @@ def launch_gateway():
         def run(self):
             while True:
                 line = self.stream.readline()
-                print line,
+                sys.stderr.write(line)
     EchoOutputThread(proc.stdout).start()
     # Connect to the gateway
     gateway = JavaGateway(GatewayClient(port=port))

From 7ec3595de28d53839cb3a45e940ec16f81ffdf45 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 28 Dec 2012 22:19:12 -0800
Subject: [PATCH 034/291] Fix bug (introduced by batching) in PySpark take()

---
 .../scala/spark/api/python/PythonRDD.scala    |  2 +-
 pyspark/pyspark/context.py                    |  6 ++---
 pyspark/pyspark/java_gateway.py               |  2 +-
 pyspark/pyspark/rdd.py                        | 25 ++++++++++++-------
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index a80a8eea45..f76616a4c4 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -194,7 +194,7 @@ private[spark] object PythonRDD {
     JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
   }
 
-  def writeArrayToPickleFile[T](items: Array[T], filename: String) {
+  def writeIteratorToPickleFile[T](items: java.util.Iterator[T], filename: String) {
     val file = new DataOutputStream(new FileOutputStream(filename))
     for (item <- items) {
       writeAsPickle(item, file)
diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index 988c81cd5d..b90596ecc2 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -19,8 +19,8 @@ class SparkContext(object):
 
     gateway = launch_gateway()
     jvm = gateway.jvm
-    readRDDFromPickleFile = jvm.PythonRDD.readRDDFromPickleFile
-    writeArrayToPickleFile = jvm.PythonRDD.writeArrayToPickleFile
+    _readRDDFromPickleFile = jvm.PythonRDD.readRDDFromPickleFile
+    _writeIteratorToPickleFile = jvm.PythonRDD.writeIteratorToPickleFile
 
     def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
         environment=None, batchSize=1024):
@@ -94,7 +94,7 @@ class SparkContext(object):
         for x in c:
             write_with_length(dump_pickle(x), tempFile)
         tempFile.close()
-        jrdd = self.readRDDFromPickleFile(self._jsc, tempFile.name, numSlices)
+        jrdd = self._readRDDFromPickleFile(self._jsc, tempFile.name, numSlices)
         return RDD(jrdd, self)
 
     def textFile(self, name, minSplits=None):
diff --git a/pyspark/pyspark/java_gateway.py b/pyspark/pyspark/java_gateway.py
index eb2a875762..2329e536cc 100644
--- a/pyspark/pyspark/java_gateway.py
+++ b/pyspark/pyspark/java_gateway.py
@@ -30,7 +30,7 @@ def launch_gateway():
                 sys.stderr.write(line)
     EchoOutputThread(proc.stdout).start()
     # Connect to the gateway
-    gateway = JavaGateway(GatewayClient(port=port))
+    gateway = JavaGateway(GatewayClient(port=port), auto_convert=False)
     # Import the classes used by PySpark
     java_import(gateway.jvm, "spark.api.java.*")
     java_import(gateway.jvm, "spark.api.python.*")
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index bf32472d25..111476d274 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -152,8 +152,8 @@ class RDD(object):
         into a list.
 
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
-        >>> rdd.glom().first()
-        [1, 2]
+        >>> sorted(rdd.glom().collect())
+        [[1, 2], [3, 4]]
         """
         def func(iterator): yield list(iterator)
         return self.mapPartitions(func)
@@ -211,10 +211,10 @@ class RDD(object):
         """
         Return a list that contains all of the elements in this RDD.
         """
-        picklesInJava = self._jrdd.rdd().collect()
-        return list(self._collect_array_through_file(picklesInJava))
+        picklesInJava = self._jrdd.collect().iterator()
+        return list(self._collect_iterator_through_file(picklesInJava))
 
-    def _collect_array_through_file(self, array):
+    def _collect_iterator_through_file(self, iterator):
         # Transferring lots of data through Py4J can be slow because
         # socket.readline() is inefficient.  Instead, we'll dump the data to a
         # file and read it back.
@@ -224,7 +224,7 @@ class RDD(object):
             try: os.unlink(tempFile.name)
             except: pass
         atexit.register(clean_up_file)
-        self.ctx.writeArrayToPickleFile(array, tempFile.name)
+        self.ctx._writeIteratorToPickleFile(iterator, tempFile.name)
         # Read the data into Python and deserialize it:
         with open(tempFile.name, 'rb') as tempFile:
             for item in read_from_pickle_file(tempFile):
@@ -325,11 +325,18 @@ class RDD(object):
         a lot of partitions are required. In that case, use L{collect} to get
         the whole RDD instead.
 
-        >>> sc.parallelize([2, 3, 4]).take(2)
+        >>> sc.parallelize([2, 3, 4, 5, 6]).take(2)
         [2, 3]
+        >>> sc.parallelize([2, 3, 4, 5, 6]).take(10)
+        [2, 3, 4, 5, 6]
         """
-        picklesInJava = self._jrdd.rdd().take(num)
-        return list(self._collect_array_through_file(picklesInJava))
+        items = []
+        splits = self._jrdd.splits()
+        while len(items) < num and splits:
+            split = splits.pop(0)
+            iterator = self._jrdd.iterator(split)
+            items.extend(self._collect_iterator_through_file(iterator))
+        return items[:num]
 
     def first(self):
         """

From c2b105af34f7241ac0597d9c35fbf66633a3eaf6 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 28 Dec 2012 22:51:28 -0800
Subject: [PATCH 035/291] Add documentation for Python API.

---
 docs/_layouts/global.html                   |  1 +
 docs/api.md                                 |  1 +
 docs/index.md                               | 11 +--
 docs/python-programming-guide.md            | 74 +++++++++++++++++++++
 docs/quick-start.md                         | 40 ++++++++++-
 pyspark/README                              | 42 ------------
 pyspark/{pyspark => }/examples/kmeans.py    |  0
 pyspark/{pyspark => }/examples/pi.py        |  0
 pyspark/{pyspark => }/examples/tc.py        |  0
 pyspark/{pyspark => }/examples/wordcount.py |  0
 pyspark/pyspark/__init__.py                 |  6 ++
 pyspark/pyspark/examples/__init__.py        |  0
 12 files changed, 127 insertions(+), 48 deletions(-)
 create mode 100644 docs/python-programming-guide.md
 delete mode 100644 pyspark/README
 rename pyspark/{pyspark => }/examples/kmeans.py (100%)
 rename pyspark/{pyspark => }/examples/pi.py (100%)
 rename pyspark/{pyspark => }/examples/tc.py (100%)
 rename pyspark/{pyspark => }/examples/wordcount.py (100%)
 delete mode 100644 pyspark/pyspark/examples/__init__.py

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 43a5fa3e1c..96eebd9f23 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -47,6 +47,7 @@
                                 <li><a href="quick-start.html">Quick Start</a></li>
                                 <li><a href="scala-programming-guide.html">Scala</a></li>
                                 <li><a href="java-programming-guide.html">Java</a></li>
+                                <li><a href="python-programming-guide.html">Python</a></li>
                             </ul>
                         </li>
 
diff --git a/docs/api.md b/docs/api.md
index 43548b223c..b9c93ac5e8 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -8,3 +8,4 @@ Here you can find links to the Scaladoc generated for the Spark sbt subprojects.
 - [Core](api/core/index.html)
 - [Examples](api/examples/index.html)
 - [Bagel](api/bagel/index.html)
+- [PySpark](api/pyspark/index.html)
diff --git a/docs/index.md b/docs/index.md
index ed9953a590..33ab58a962 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -7,11 +7,11 @@ title: Spark Overview
 TODO(andyk): Rewrite to make the Java API a first class part of the story.
 {% endcomment %}
 
-Spark is a MapReduce-like cluster computing framework designed for low-latency iterative jobs and interactive use from an 
-interpreter. It provides clean, language-integrated APIs in Scala and Java, with a rich array of parallel operators. Spark can 
-run on top of the [Apache Mesos](http://incubator.apache.org/mesos/) cluster manager, 
+Spark is a MapReduce-like cluster computing framework designed for low-latency iterative jobs and interactive use from an interpreter.
+It provides clean, language-integrated APIs in Scala, Java, and Python, with a rich array of parallel operators.
+Spark can run on top of the [Apache Mesos](http://incubator.apache.org/mesos/) cluster manager,
 [Hadoop YARN](http://hadoop.apache.org/docs/r2.0.1-alpha/hadoop-yarn/hadoop-yarn-site/YARN.html),
-Amazon EC2, or without an independent resource manager ("standalone mode"). 
+Amazon EC2, or without an independent resource manager ("standalone mode").
 
 # Downloading
 
@@ -59,6 +59,7 @@ of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`).
 * [Quick Start](quick-start.html): a quick introduction to the Spark API; start here!
 * [Spark Programming Guide](scala-programming-guide.html): an overview of Spark concepts, and details on the Scala API
 * [Java Programming Guide](java-programming-guide.html): using Spark from Java
+* [Python Programming Guide](python-programming-guide.html): using Spark from Python
 
 **Deployment guides:**
 
@@ -72,7 +73,7 @@ of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`).
 
 * [Configuration](configuration.html): customize Spark via its configuration system
 * [Tuning Guide](tuning.html): best practices to optimize performance and memory use
-* [API Docs (Scaladoc)](api/core/index.html)
+* API Docs: [Java/Scala (Scaladoc)](api/core/index.html) and [Python (Epydoc)](api/pyspark/index.html)
 * [Bagel](bagel-programming-guide.html): an implementation of Google's Pregel on Spark
 * [Contributing to Spark](contributing-to-spark.html)
 
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
new file mode 100644
index 0000000000..b7c747f905
--- /dev/null
+++ b/docs/python-programming-guide.md
@@ -0,0 +1,74 @@
+---
+layout: global
+title: Python Programming Guide
+---
+
+
+The Spark Python API (PySpark) exposes most of the Spark features available in the Scala version to Python.
+To learn the basics of Spark, we recommend reading through the
+[Scala programming guide](scala-programming-guide.html) first; it should be
+easy to follow even if you don't know Scala.
+This guide will show how to use the Spark features described there in Python.
+
+# Key Differences in the Python API
+
+There are a few key differences between the Python and Scala APIs:
+
+* Python is dynamically typed, so RDDs can hold objects of different types.
+* PySpark does not currently support the following Spark features:
+    - Accumulators
+    - Special functions on RRDs of doubles, such as `mean` and `stdev`
+    - Approximate jobs / functions, such as `countApprox` and `sumApprox`.
+    - `lookup`
+    - `mapPartitionsWithSplit`
+    - `persist` at storage levels other than `MEMORY_ONLY`
+    - `sample`
+    - `sort`
+
+
+# Installing and Configuring PySpark
+
+PySpark requires Python 2.6 or higher.
+PySpark jobs are executed using a standard cPython interpreter in order to support Python modules that use C extensions.
+We have not tested PySpark with Python 3 or with alternative Python interpreters, such as [PyPy](http://pypy.org/) or [Jython](http://www.jython.org/).
+By default, PySpark's scripts will run programs using `python`; an alternate Python executable may be specified by setting the `PYSPARK_PYTHON` environment variable in `conf/spark-env.sh`.
+
+All of PySpark's library dependencies, including [Py4J](http://py4j.sourceforge.net/), are bundled with PySpark and automatically imported.
+
+Standalone PySpark jobs should be run using the `run-pyspark` script, which automatically configures the Java and Python environmnt using the settings in `conf/spark-env.sh`.
+The script automatically adds the `pyspark` package to the `PYTHONPATH`.
+
+
+# Interactive Use
+
+PySpark's `pyspark-shell` script provides a simple way to learn the API:
+
+{% highlight python %}
+>>> words = sc.textFile("/usr/share/dict/words")
+>>> words.filter(lambda w: w.startswith("spar")).take(5)
+[u'spar', u'sparable', u'sparada', u'sparadrap', u'sparagrass']
+{% endhighlight %}
+
+# Standalone Use
+
+PySpark can also be used from standalone Python scripts by creating a SparkContext in the script and running the script using the `run-pyspark` script in the `pyspark` directory.
+The Quick Start guide includes a [complete example](quick-start.html#a-standalone-job-in-python) of a standalone Python job.
+
+Code dependencies can be deployed by listing them in the `pyFiles` option in the SparkContext constructor:
+
+{% highlight python %}
+from pyspark import SparkContext
+sc = SparkContext("local", "Job Name", pyFiles=['MyFile.py', 'lib.zip', 'app.egg'])
+{% endhighlight %}
+
+Files listed here will be added to the `PYTHONPATH` and shipped to remote worker machines.
+Code dependencies can be added to an existing SparkContext using its `addPyFile()` method.
+
+# Where to Go from Here
+
+PySpark includes several sample programs using the Python API in `pyspark/examples`.
+You can run them by passing the files to the `pyspark-run` script included in PySpark -- for example `./pyspark-run examples/wordcount.py`.
+Each example program prints usage help when run without any arguments.
+
+We currently provide [API documentation](api/pyspark/index.html) for the Python API as Epydoc.
+Many of the RDD method descriptions contain [doctests](http://docs.python.org/2/library/doctest.html) that provide additional usage examples.
diff --git a/docs/quick-start.md b/docs/quick-start.md
index defdb34836..c859c31b09 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -6,7 +6,8 @@ title: Quick Start
 * This will become a table of contents (this text will be scraped).
 {:toc}
 
-This tutorial provides a quick introduction to using Spark. We will first introduce the API through Spark's interactive Scala shell (don't worry if you don't know Scala -- you will need much for this), then show how to write standalone jobs in Scala and Java. See the [programming guide](scala-programming-guide.html) for a fuller reference.
+This tutorial provides a quick introduction to using Spark. We will first introduce the API through Spark's interactive Scala shell (don't worry if you don't know Scala -- you will need much for this), then show how to write standalone jobs in Scala, Java, and Python.
+See the [programming guide](scala-programming-guide.html) for a more complete reference.
 
 To follow along with this guide, you only need to have successfully built Spark on one machine. Simply go into your Spark directory and run:
 
@@ -230,3 +231,40 @@ Lines with a: 8422, Lines with b: 1836
 {% endhighlight %}
 
 This example only runs the job locally; for a tutorial on running jobs across several machines, see the [Standalone Mode](spark-standalone.html) documentation, and consider using a distributed input source, such as HDFS.
+
+# A Standalone Job In Python
+Now we will show how to write a standalone job using the Python API (PySpark).
+
+As an example, we'll create a simple Spark job, `SimpleJob.py`:
+
+{% highlight python %}
+"""SimpleJob.py"""
+from pyspark import SparkContext
+
+logFile = "/var/log/syslog"  # Should be some file on your system
+sc = SparkContext("local", "Simple job")
+logData = sc.textFile(logFile).cache()
+
+numAs = logData.filter(lambda s: 'a' in s).count()
+numBs = logData.filter(lambda s: 'b' in s).count()
+
+print "Lines with a: %i, lines with b: %i" % (numAs, numBs)
+{% endhighlight %}
+
+
+This job simply counts the number of lines containing 'a' and the number containing 'b' in a system log file.
+Like in the Scala and Java examples, we use a SparkContext to create RDDs.
+We can pass Python functions to Spark, which are automatically serialized along with any variables that they reference.
+For jobs that use custom classes or third-party libraries, we can add those code dependencies to SparkContext to ensure that they will be available on remote machines; this is described in more detail in the [Python programming guide](python-programming-guide).
+`SimpleJob` is simple enough that we do not need to specify any code dependencies.
+
+We can run this job using the `run-pyspark` script in `$SPARK_HOME/pyspark`:
+
+{% highlight python %}
+$ cd $SPARK_HOME
+$ ./pyspark/run-pyspark SimpleJob.py
+...
+Lines with a: 8422, Lines with b: 1836
+{% endhighlight python %}
+
+This example only runs the job locally; for a tutorial on running jobs across several machines, see the [Standalone Mode](spark-standalone.html) documentation, and consider using a distributed input source, such as HDFS.
diff --git a/pyspark/README b/pyspark/README
deleted file mode 100644
index d8d521c72c..0000000000
--- a/pyspark/README
+++ /dev/null
@@ -1,42 +0,0 @@
-# PySpark
-
-PySpark is a Python API for Spark.
-
-PySpark jobs are writen in Python and executed using a standard Python
-interpreter; this supports modules that use Python C extensions.  The
-API is based on the Spark Scala API and uses regular Python functions
-and lambdas to support user-defined functions.  PySpark supports
-interactive use through a standard Python interpreter; it can
-automatically serialize closures and ship them to worker processes.
-
-PySpark is built on top of the Spark Java API.  Data is uniformly
-represented as serialized Python objects and stored in Spark Java
-processes, which communicate with PySpark worker processes over pipes.
-
-## Features
-
-PySpark supports most of the Spark API, including broadcast variables.
-RDDs are dynamically typed and can hold any Python object.
-
-PySpark does not support:
-
-- Special functions on RDDs of doubles
-- Accumulators
-
-## Examples and Documentation
-
-The PySpark source contains docstrings and doctests that document its
-API.  The public classes are in `context.py` and `rdd.py`.
-
-The `pyspark/pyspark/examples` directory contains a few complete
-examples.
-
-## Installing PySpark
-#
-To use PySpark, `SPARK_HOME` should be set to the location of the Spark
-package.
-
-## Running PySpark
-
-The easiest way to run PySpark is to use the `run-pyspark` and
-`pyspark-shell` scripts, which are included in the `pyspark` directory.
diff --git a/pyspark/pyspark/examples/kmeans.py b/pyspark/examples/kmeans.py
similarity index 100%
rename from pyspark/pyspark/examples/kmeans.py
rename to pyspark/examples/kmeans.py
diff --git a/pyspark/pyspark/examples/pi.py b/pyspark/examples/pi.py
similarity index 100%
rename from pyspark/pyspark/examples/pi.py
rename to pyspark/examples/pi.py
diff --git a/pyspark/pyspark/examples/tc.py b/pyspark/examples/tc.py
similarity index 100%
rename from pyspark/pyspark/examples/tc.py
rename to pyspark/examples/tc.py
diff --git a/pyspark/pyspark/examples/wordcount.py b/pyspark/examples/wordcount.py
similarity index 100%
rename from pyspark/pyspark/examples/wordcount.py
rename to pyspark/examples/wordcount.py
diff --git a/pyspark/pyspark/__init__.py b/pyspark/pyspark/__init__.py
index 549c2d2711..8f8402b62b 100644
--- a/pyspark/pyspark/__init__.py
+++ b/pyspark/pyspark/__init__.py
@@ -1,3 +1,9 @@
 import sys
 import os
 sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "pyspark/lib/py4j0.7.egg"))
+
+
+from pyspark.context import SparkContext
+
+
+__all__ = ["SparkContext"]
diff --git a/pyspark/pyspark/examples/__init__.py b/pyspark/pyspark/examples/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000

From 6ee1ff2663cf1f776dd33e448548a8ddcf974dc6 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 29 Dec 2012 22:22:56 +0000
Subject: [PATCH 036/291] Fix bug in pyspark.serializers.batch; add .gitignore.

---
 pyspark/.gitignore             | 2 ++
 pyspark/pyspark/rdd.py         | 4 +++-
 pyspark/pyspark/serializers.py | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 pyspark/.gitignore

diff --git a/pyspark/.gitignore b/pyspark/.gitignore
new file mode 100644
index 0000000000..5c56e638f9
--- /dev/null
+++ b/pyspark/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+docs/
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 111476d274..20f84b2dd0 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -695,7 +695,9 @@ def _test():
     import doctest
     from pyspark.context import SparkContext
     globs = globals().copy()
-    globs['sc'] = SparkContext('local[4]', 'PythonTest')
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
     doctest.testmod(globs=globs)
     globs['sc'].stop()
 
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
index 4ed925697c..8b08f7ef0f 100644
--- a/pyspark/pyspark/serializers.py
+++ b/pyspark/pyspark/serializers.py
@@ -24,7 +24,7 @@ def batched(iterator, batchSize):
         if count == batchSize:
             yield Batch(items)
             items = []
-            count = []
+            count = 0
     if items:
         yield Batch(items)
 

From 26186e2d259f3aa2db9c8594097fd342107ce147 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 29 Dec 2012 15:34:57 -0800
Subject: [PATCH 037/291] Use batching in pyspark parallelize(); fix
 cartesian()

---
 pyspark/pyspark/context.py     |  4 +++-
 pyspark/pyspark/rdd.py         | 31 +++++++++++++++----------------
 pyspark/pyspark/serializers.py | 23 +++++++++++++----------
 3 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/pyspark/pyspark/context.py b/pyspark/pyspark/context.py
index b90596ecc2..6172d69dcf 100644
--- a/pyspark/pyspark/context.py
+++ b/pyspark/pyspark/context.py
@@ -4,7 +4,7 @@ from tempfile import NamedTemporaryFile
 
 from pyspark.broadcast import Broadcast
 from pyspark.java_gateway import launch_gateway
-from pyspark.serializers import dump_pickle, write_with_length
+from pyspark.serializers import dump_pickle, write_with_length, batched
 from pyspark.rdd import RDD
 
 from py4j.java_collections import ListConverter
@@ -91,6 +91,8 @@ class SparkContext(object):
         # objects are written to a file and loaded through textFile().
         tempFile = NamedTemporaryFile(delete=False)
         atexit.register(lambda: os.unlink(tempFile.name))
+        if self.batchSize != 1:
+            c = batched(c, self.batchSize)
         for x in c:
             write_with_length(dump_pickle(x), tempFile)
         tempFile.close()
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 20f84b2dd0..203f7377d2 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -2,7 +2,7 @@ import atexit
 from base64 import standard_b64encode as b64enc
 import copy
 from collections import defaultdict
-from itertools import chain, ifilter, imap
+from itertools import chain, ifilter, imap, product
 import operator
 import os
 import shlex
@@ -123,12 +123,6 @@ class RDD(object):
         >>> rdd = sc.parallelize([1, 1, 2, 3])
         >>> rdd.union(rdd).collect()
         [1, 1, 2, 3, 1, 1, 2, 3]
-
-        Union of batched and unbatched RDDs (internal test):
-
-        >>> batchedRDD = sc.parallelize([Batch([1, 2, 3, 4, 5])])
-        >>> rdd.union(batchedRDD).collect()
-        [1, 1, 2, 3, 1, 2, 3, 4, 5]
         """
         return RDD(self._jrdd.union(other._jrdd), self.ctx)
 
@@ -168,7 +162,18 @@ class RDD(object):
         >>> sorted(rdd.cartesian(rdd).collect())
         [(1, 1), (1, 2), (2, 1), (2, 2)]
         """
-        return RDD(self._jrdd.cartesian(other._jrdd), self.ctx)
+        # Due to batching, we can't use the Java cartesian method.
+        java_cartesian = RDD(self._jrdd.cartesian(other._jrdd), self.ctx)
+        def unpack_batches(pair):
+            (x, y) = pair
+            if type(x) == Batch or type(y) == Batch:
+                xs = x.items if type(x) == Batch else [x]
+                ys = y.items if type(y) == Batch else [y]
+                for pair in product(xs, ys):
+                    yield pair
+            else:
+                yield pair
+        return java_cartesian.flatMap(unpack_batches)
 
     def groupBy(self, f, numSplits=None):
         """
@@ -293,8 +298,6 @@ class RDD(object):
 
         >>> sc.parallelize([2, 3, 4]).count()
         3
-        >>> sc.parallelize([Batch([2, 3, 4])]).count()
-        3
         """
         return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
 
@@ -667,12 +670,8 @@ class PipelinedRDD(RDD):
         if not self._bypass_serializer and self.ctx.batchSize != 1:
             oldfunc = self.func
             batchSize = self.ctx.batchSize
-            if batchSize == -1:  # unlimited batch size
-                def batched_func(iterator):
-                    yield Batch(list(oldfunc(iterator)))
-            else:
-                def batched_func(iterator):
-                    return batched(oldfunc(iterator), batchSize)
+            def batched_func(iterator):
+                return batched(oldfunc(iterator), batchSize)
             func = batched_func
         cmds = [func, self._bypass_serializer]
         pipe_command = ' '.join(b64enc(cloudpickle.dumps(f)) for f in cmds)
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
index 8b08f7ef0f..9a5151ea00 100644
--- a/pyspark/pyspark/serializers.py
+++ b/pyspark/pyspark/serializers.py
@@ -16,17 +16,20 @@ class Batch(object):
 
 
 def batched(iterator, batchSize):
-    items = []
-    count = 0
-    for item in iterator:
-        items.append(item)
-        count += 1
-        if count == batchSize:
+    if batchSize == -1: # unlimited batch size
+        yield Batch(list(iterator))
+    else:
+        items = []
+        count = 0
+        for item in iterator:
+            items.append(item)
+            count += 1
+            if count == batchSize:
+                yield Batch(items)
+                items = []
+                count = 0
+        if items:
             yield Batch(items)
-            items = []
-            count = 0
-    if items:
-        yield Batch(items)
 
 
 def dump_pickle(obj):

From 59195c68ec37acf20d527189ed757397b273a207 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 29 Dec 2012 16:01:03 -0800
Subject: [PATCH 038/291] Update PySpark for compatibility with TaskContext.

---
 .../src/main/scala/spark/api/python/PythonRDD.scala | 13 +++++--------
 pyspark/pyspark/rdd.py                              |  3 ++-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index f76616a4c4..dc48378fdc 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -8,10 +8,7 @@ import scala.io.Source
 
 import spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
 import spark.broadcast.Broadcast
-import spark.SparkEnv
-import spark.Split
-import spark.RDD
-import spark.OneToOneDependency
+import spark._
 import spark.rdd.PipedRDD
 
 
@@ -34,7 +31,7 @@ private[spark] class PythonRDD[T: ClassManifest](
 
   override val partitioner = if (preservePartitoning) parent.partitioner else None
 
-  override def compute(split: Split): Iterator[Array[Byte]] = {
+  override def compute(split: Split, context: TaskContext): Iterator[Array[Byte]] = {
     val SPARK_HOME = new ProcessBuilder().environment().get("SPARK_HOME")
 
     val pb = new ProcessBuilder(Seq(pythonExec, SPARK_HOME + "/pyspark/pyspark/worker.py"))
@@ -74,7 +71,7 @@ private[spark] class PythonRDD[T: ClassManifest](
           out.println(elem)
         }
         out.flush()
-        for (elem <- parent.iterator(split)) {
+        for (elem <- parent.iterator(split, context)) {
           PythonRDD.writeAsPickle(elem, dOut)
         }
         dOut.flush()
@@ -123,8 +120,8 @@ private class PairwiseRDD(prev: RDD[Array[Byte]]) extends
   RDD[(Array[Byte], Array[Byte])](prev.context) {
   override def splits = prev.splits
   override val dependencies = List(new OneToOneDependency(prev))
-  override def compute(split: Split) =
-    prev.iterator(split).grouped(2).map {
+  override def compute(split: Split, context: TaskContext) =
+    prev.iterator(split, context).grouped(2).map {
       case Seq(a, b) => (a, b)
       case x          => throw new Exception("PairwiseRDD: unexpected value: " + x)
     }
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 203f7377d2..21dda31c4e 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -335,9 +335,10 @@ class RDD(object):
         """
         items = []
         splits = self._jrdd.splits()
+        taskContext = self.ctx.jvm.spark.TaskContext(0, 0, 0)
         while len(items) < num and splits:
             split = splits.pop(0)
-            iterator = self._jrdd.iterator(split)
+            iterator = self._jrdd.iterator(split, taskContext)
             items.extend(self._collect_iterator_through_file(iterator))
         return items[:num]
 

From 39dd953fd88e9aa7335603ab452d9c1bed4ba67a Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 29 Dec 2012 17:06:50 -0800
Subject: [PATCH 039/291] Add test for pyspark.RDD.saveAsTextFile().

---
 pyspark/pyspark/rdd.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 21dda31c4e..cbffb6cc1f 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -351,10 +351,17 @@ class RDD(object):
         """
         return self.take(1)[0]
 
-    # TODO: add test and fix for use with Batch
     def saveAsTextFile(self, path):
         """
         Save this RDD as a text file, using string representations of elements.
+
+        >>> tempFile = NamedTemporaryFile(delete=True)
+        >>> tempFile.close()
+        >>> sc.parallelize(range(10)).saveAsTextFile(tempFile.name)
+        >>> from fileinput import input
+        >>> from glob import glob
+        >>> ''.join(input(glob(tempFile.name + "/part-0000*")))
+        '0\\n1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n'
         """
         def func(iterator):
             return (str(x).encode("utf-8") for x in iterator)

From 099898b43955d99351ec94d4a373de854bf7edf7 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 29 Dec 2012 17:52:47 -0800
Subject: [PATCH 040/291] Port LR example to PySpark using numpy.

This version of the example crashes after the first iteration with
"OverflowError: math range error" because Python's math.exp()
behaves differently than Scala's; see SPARK-646.
---
 pyspark/examples/lr.py | 57 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100755 pyspark/examples/lr.py

diff --git a/pyspark/examples/lr.py b/pyspark/examples/lr.py
new file mode 100755
index 0000000000..5fca0266b8
--- /dev/null
+++ b/pyspark/examples/lr.py
@@ -0,0 +1,57 @@
+"""
+This example requires numpy (http://www.numpy.org/)
+"""
+from collections import namedtuple
+from math import exp
+from os.path import realpath
+import sys
+
+import numpy as np
+from pyspark.context import SparkContext
+
+
+N = 100000  # Number of data points
+D = 10  # Number of dimensions
+R = 0.7   # Scaling factor
+ITERATIONS = 5
+np.random.seed(42)
+
+
+DataPoint = namedtuple("DataPoint", ['x', 'y'])
+from lr import DataPoint  # So that DataPoint is properly serialized
+
+
+def generateData():
+    def generatePoint(i):
+        y = -1 if i % 2 == 0 else 1
+        x = np.random.normal(size=D) + (y * R)
+        return DataPoint(x, y)
+    return [generatePoint(i) for i in range(N)]
+
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        print >> sys.stderr, \
+            "Usage: PythonLR <host> [<slices>]"
+        exit(-1)
+    sc = SparkContext(sys.argv[1], "PythonLR", pyFiles=[realpath(__file__)])
+    slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
+    points = sc.parallelize(generateData(), slices).cache()
+
+    # Initialize w to a random value
+    w = 2 * np.random.ranf(size=D) - 1
+    print "Initial w: " + str(w)
+
+    def add(x, y):
+        x += y
+        return x
+
+    for i in range(1, ITERATIONS + 1):
+        print "On iteration %i" % i
+
+        gradient = points.map(lambda p:
+            (1.0 / (1.0 + exp(-p.y * np.dot(w, p.x)))) * p.y * p.x
+        ).reduce(add)
+        w -= gradient
+
+    print "Final w: " + str(w)

From 6f6a6b79c4c3f3555f8ff427c91e714d02afe8fa Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 31 Dec 2012 14:56:23 -0800
Subject: [PATCH 041/291] Launch with `scala` by default in run-pyspark

---
 pyspark/run-pyspark | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pyspark/run-pyspark b/pyspark/run-pyspark
index f8039b8038..4d10fbea8b 100755
--- a/pyspark/run-pyspark
+++ b/pyspark/run-pyspark
@@ -20,4 +20,9 @@ export PYSPARK_PYTHON
 # Add the PySpark classes to the Python path:
 export PYTHONPATH=$SPARK_HOME/pyspark/:$PYTHONPATH
 
+# Launch with `scala` by default:
+if [[ "$SPARK_LAUNCH_WITH_SCALA" != "0" ]] ; then
+    export SPARK_LAUNCH_WITH_SCALA=1
+fi
+
 exec "$PYSPARK_PYTHON" "$@"

From 170e451fbdd308ae77065bd9c0f2bd278abf0cb7 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Tue, 1 Jan 2013 13:52:14 -0800
Subject: [PATCH 042/291] Minor documentation and style fixes for PySpark.

---
 .../spark/api/python/PythonPartitioner.scala  |  4 +-
 .../scala/spark/api/python/PythonRDD.scala    | 43 +++++++++++++------
 docs/index.md                                 |  8 +++-
 docs/python-programming-guide.md              |  3 +-
 pyspark/examples/kmeans.py                    | 13 +++---
 .../{lr.py => logistic_regression.py}         |  4 +-
 pyspark/examples/pi.py                        |  5 ++-
 .../examples/{tc.py => transitive_closure.py} |  5 ++-
 pyspark/examples/wordcount.py                 |  4 +-
 pyspark/pyspark/__init__.py                   | 13 +++++-
 10 files changed, 70 insertions(+), 32 deletions(-)
 rename pyspark/examples/{lr.py => logistic_regression.py} (93%)
 rename pyspark/examples/{tc.py => transitive_closure.py} (94%)

diff --git a/core/src/main/scala/spark/api/python/PythonPartitioner.scala b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
index 2c829508e5..648d9402b0 100644
--- a/core/src/main/scala/spark/api/python/PythonPartitioner.scala
+++ b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
@@ -17,9 +17,9 @@ private[spark] class PythonPartitioner(override val numPartitions: Int) extends
       val hashCode = {
         if (key.isInstanceOf[Array[Byte]]) {
           Arrays.hashCode(key.asInstanceOf[Array[Byte]])
-        }
-        else
+        } else {
           key.hashCode()
+        }
       }
       val mod = hashCode % numPartitions
       if (mod < 0) {
diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index dc48378fdc..19a039e330 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -13,8 +13,12 @@ import spark.rdd.PipedRDD
 
 
 private[spark] class PythonRDD[T: ClassManifest](
-  parent: RDD[T], command: Seq[String], envVars: java.util.Map[String, String],
-  preservePartitoning: Boolean, pythonExec: String, broadcastVars: java.util.List[Broadcast[Array[Byte]]])
+  parent: RDD[T],
+  command: Seq[String],
+  envVars: java.util.Map[String, String],
+  preservePartitoning: Boolean,
+  pythonExec: String,
+  broadcastVars: java.util.List[Broadcast[Array[Byte]]])
   extends RDD[Array[Byte]](parent.context) {
 
   // Similar to Runtime.exec(), if we are given a single string, split it into words
@@ -38,8 +42,8 @@ private[spark] class PythonRDD[T: ClassManifest](
     // Add the environmental variables to the process.
     val currentEnvVars = pb.environment()
 
-    envVars.foreach {
-      case (variable, value) => currentEnvVars.put(variable, value)
+    for ((variable, value) <- envVars) {
+      currentEnvVars.put(variable, value)
     }
 
     val proc = pb.start()
@@ -116,6 +120,10 @@ private[spark] class PythonRDD[T: ClassManifest](
   val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
 }
 
+/**
+ * Form an RDD[(Array[Byte], Array[Byte])] from key-value pairs returned from Python.
+ * This is used by PySpark's shuffle operations.
+ */
 private class PairwiseRDD(prev: RDD[Array[Byte]]) extends
   RDD[(Array[Byte], Array[Byte])](prev.context) {
   override def splits = prev.splits
@@ -139,6 +147,16 @@ private[spark] object PythonRDD {
    * Write strings, pickled Python objects, or pairs of pickled objects to a data output stream.
    * The data format is a 32-bit integer representing the pickled object's length (in bytes),
    * followed by the pickled data.
+   *
+   * Pickle module:
+   *
+   *    http://docs.python.org/2/library/pickle.html
+   *
+   * The pickle protocol is documented in the source of the `pickle` and `pickletools` modules:
+   *
+   *    http://hg.python.org/cpython/file/2.6/Lib/pickle.py
+   *    http://hg.python.org/cpython/file/2.6/Lib/pickletools.py
+   *
    * @param elem the object to write
    * @param dOut a data output stream
    */
@@ -201,15 +219,14 @@ private[spark] object PythonRDD {
 }
 
 private object Pickle {
-  def b(x: Int): Byte = x.asInstanceOf[Byte]
-  val PROTO: Byte = b(0x80)
-  val TWO: Byte = b(0x02)
-  val BINUNICODE : Byte = 'X'
-  val STOP : Byte = '.'
-  val TUPLE2 : Byte = b(0x86)
-  val EMPTY_LIST : Byte = ']'
-  val MARK : Byte = '('
-  val APPENDS : Byte = 'e'
+  val PROTO: Byte = 0x80.toByte
+  val TWO: Byte = 0x02.toByte
+  val BINUNICODE: Byte = 'X'
+  val STOP: Byte = '.'
+  val TUPLE2: Byte = 0x86.toByte
+  val EMPTY_LIST: Byte = ']'
+  val MARK: Byte = '('
+  val APPENDS: Byte = 'e'
 }
 
 private class ExtractValue extends spark.api.java.function.Function[(Array[Byte],
diff --git a/docs/index.md b/docs/index.md
index 33ab58a962..848b585333 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -8,7 +8,7 @@ TODO(andyk): Rewrite to make the Java API a first class part of the story.
 {% endcomment %}
 
 Spark is a MapReduce-like cluster computing framework designed for low-latency iterative jobs and interactive use from an interpreter.
-It provides clean, language-integrated APIs in Scala, Java, and Python, with a rich array of parallel operators.
+It provides clean, language-integrated APIs in [Scala](scala-programming-guide.html), [Java](java-programming-guide.html), and [Python](python-programming-guide.html), with a rich array of parallel operators.
 Spark can run on top of the [Apache Mesos](http://incubator.apache.org/mesos/) cluster manager,
 [Hadoop YARN](http://hadoop.apache.org/docs/r2.0.1-alpha/hadoop-yarn/hadoop-yarn-site/YARN.html),
 Amazon EC2, or without an independent resource manager ("standalone mode").
@@ -61,6 +61,11 @@ of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`).
 * [Java Programming Guide](java-programming-guide.html): using Spark from Java
 * [Python Programming Guide](python-programming-guide.html): using Spark from Python
 
+**API Docs:**
+
+* [Java/Scala (Scaladoc)](api/core/index.html)
+* [Python (Epydoc)](api/pyspark/index.html)
+
 **Deployment guides:**
 
 * [Running Spark on Amazon EC2](ec2-scripts.html): scripts that let you launch a cluster on EC2 in about 5 minutes
@@ -73,7 +78,6 @@ of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`).
 
 * [Configuration](configuration.html): customize Spark via its configuration system
 * [Tuning Guide](tuning.html): best practices to optimize performance and memory use
-* API Docs: [Java/Scala (Scaladoc)](api/core/index.html) and [Python (Epydoc)](api/pyspark/index.html)
 * [Bagel](bagel-programming-guide.html): an implementation of Google's Pregel on Spark
 * [Contributing to Spark](contributing-to-spark.html)
 
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index b7c747f905..d88d4eb42d 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -17,8 +17,7 @@ There are a few key differences between the Python and Scala APIs:
 * Python is dynamically typed, so RDDs can hold objects of different types.
 * PySpark does not currently support the following Spark features:
     - Accumulators
-    - Special functions on RRDs of doubles, such as `mean` and `stdev`
-    - Approximate jobs / functions, such as `countApprox` and `sumApprox`.
+    - Special functions on RDDs of doubles, such as `mean` and `stdev`
     - `lookup`
     - `mapPartitionsWithSplit`
     - `persist` at storage levels other than `MEMORY_ONLY`
diff --git a/pyspark/examples/kmeans.py b/pyspark/examples/kmeans.py
index 9cc366f03c..ad2be21178 100644
--- a/pyspark/examples/kmeans.py
+++ b/pyspark/examples/kmeans.py
@@ -1,18 +1,21 @@
+"""
+This example requires numpy (http://www.numpy.org/)
+"""
 import sys
 
-from pyspark.context import SparkContext
-from numpy import array, sum as np_sum
+import numpy as np
+from pyspark import SparkContext
 
 
 def parseVector(line):
-    return array([float(x) for x in line.split(' ')])
+    return np.array([float(x) for x in line.split(' ')])
 
 
 def closestPoint(p, centers):
     bestIndex = 0
     closest = float("+inf")
     for i in range(len(centers)):
-        tempDist = np_sum((p - centers[i]) ** 2)
+        tempDist = np.sum((p - centers[i]) ** 2)
         if tempDist < closest:
             closest = tempDist
             bestIndex = i
@@ -41,7 +44,7 @@ if __name__ == "__main__":
         newPoints = pointStats.map(
             lambda (x, (y, z)): (x, y / z)).collect()
 
-        tempDist = sum(np_sum((kPoints[x] - y) ** 2) for (x, y) in newPoints)
+        tempDist = sum(np.sum((kPoints[x] - y) ** 2) for (x, y) in newPoints)
 
         for (x, y) in newPoints:
             kPoints[x] = y
diff --git a/pyspark/examples/lr.py b/pyspark/examples/logistic_regression.py
similarity index 93%
rename from pyspark/examples/lr.py
rename to pyspark/examples/logistic_regression.py
index 5fca0266b8..f13698a86f 100755
--- a/pyspark/examples/lr.py
+++ b/pyspark/examples/logistic_regression.py
@@ -7,7 +7,7 @@ from os.path import realpath
 import sys
 
 import numpy as np
-from pyspark.context import SparkContext
+from pyspark import SparkContext
 
 
 N = 100000  # Number of data points
@@ -32,7 +32,7 @@ def generateData():
 if __name__ == "__main__":
     if len(sys.argv) == 1:
         print >> sys.stderr, \
-            "Usage: PythonLR <host> [<slices>]"
+            "Usage: PythonLR <master> [<slices>]"
         exit(-1)
     sc = SparkContext(sys.argv[1], "PythonLR", pyFiles=[realpath(__file__)])
     slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
diff --git a/pyspark/examples/pi.py b/pyspark/examples/pi.py
index 348bbc5dce..127cba029b 100644
--- a/pyspark/examples/pi.py
+++ b/pyspark/examples/pi.py
@@ -1,13 +1,14 @@
 import sys
 from random import random
 from operator import add
-from pyspark.context import SparkContext
+
+from pyspark import SparkContext
 
 
 if __name__ == "__main__":
     if len(sys.argv) == 1:
         print >> sys.stderr, \
-            "Usage: PythonPi <host> [<slices>]"
+            "Usage: PythonPi <master> [<slices>]"
         exit(-1)
     sc = SparkContext(sys.argv[1], "PythonPi")
     slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
diff --git a/pyspark/examples/tc.py b/pyspark/examples/transitive_closure.py
similarity index 94%
rename from pyspark/examples/tc.py
rename to pyspark/examples/transitive_closure.py
index 9630e72b47..73f7f8fbaf 100644
--- a/pyspark/examples/tc.py
+++ b/pyspark/examples/transitive_closure.py
@@ -1,6 +1,7 @@
 import sys
 from random import Random
-from pyspark.context import SparkContext
+
+from pyspark import SparkContext
 
 numEdges = 200
 numVertices = 100
@@ -20,7 +21,7 @@ def generateGraph():
 if __name__ == "__main__":
     if len(sys.argv) == 1:
         print >> sys.stderr, \
-            "Usage: PythonTC <host> [<slices>]"
+            "Usage: PythonTC <master> [<slices>]"
         exit(-1)
     sc = SparkContext(sys.argv[1], "PythonTC")
     slices = sys.argv[2] if len(sys.argv) > 2 else 2
diff --git a/pyspark/examples/wordcount.py b/pyspark/examples/wordcount.py
index 8365c070e8..857160624b 100644
--- a/pyspark/examples/wordcount.py
+++ b/pyspark/examples/wordcount.py
@@ -1,6 +1,8 @@
 import sys
 from operator import add
-from pyspark.context import SparkContext
+
+from pyspark import SparkContext
+
 
 if __name__ == "__main__":
     if len(sys.argv) < 3:
diff --git a/pyspark/pyspark/__init__.py b/pyspark/pyspark/__init__.py
index 8f8402b62b..1ab360a666 100644
--- a/pyspark/pyspark/__init__.py
+++ b/pyspark/pyspark/__init__.py
@@ -1,9 +1,20 @@
+"""
+PySpark is a Python API for Spark.
+
+Public classes:
+
+    - L{SparkContext<pyspark.context.SparkContext>}
+        Main entry point for Spark functionality.
+    - L{RDD<pyspark.rdd.RDD>}
+        A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+"""
 import sys
 import os
 sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "pyspark/lib/py4j0.7.egg"))
 
 
 from pyspark.context import SparkContext
+from pyspark.rdd import RDD
 
 
-__all__ = ["SparkContext"]
+__all__ = ["SparkContext", "RDD"]

From b58340dbd9a741331fc4c3829b08c093560056c2 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Tue, 1 Jan 2013 14:48:45 -0800
Subject: [PATCH 043/291] Rename top-level 'pyspark' directory to 'python'

---
 .../src/main/scala/spark/api/python/PythonRDD.scala |   2 +-
 docs/_plugins/copy_api_dirs.rb                      |   8 ++++----
 pyspark-shell                                       |   3 +++
 pyspark/pyspark-shell                               |   3 ---
 {pyspark => python}/.gitignore                      |   0
 {pyspark => python}/epydoc.conf                     |   0
 {pyspark => python}/examples/kmeans.py              |   0
 {pyspark => python}/examples/logistic_regression.py |   0
 {pyspark => python}/examples/pi.py                  |   0
 {pyspark => python}/examples/transitive_closure.py  |   0
 {pyspark => python}/examples/wordcount.py           |   0
 {pyspark => python}/lib/PY4J_LICENSE.txt            |   0
 {pyspark => python}/lib/PY4J_VERSION.txt            |   0
 {pyspark => python}/lib/py4j0.7.egg                 | Bin
 {pyspark => python}/lib/py4j0.7.jar                 | Bin
 {pyspark => python}/pyspark/__init__.py             |   2 +-
 {pyspark => python}/pyspark/broadcast.py            |   0
 {pyspark => python}/pyspark/cloudpickle.py          |   0
 {pyspark => python}/pyspark/context.py              |   0
 {pyspark => python}/pyspark/java_gateway.py         |   0
 {pyspark => python}/pyspark/join.py                 |   0
 {pyspark => python}/pyspark/rdd.py                  |   0
 {pyspark => python}/pyspark/serializers.py          |   0
 {pyspark => python}/pyspark/shell.py                |   0
 {pyspark => python}/pyspark/worker.py               |   0
 run                                                 |   2 +-
 pyspark/run-pyspark => run-pyspark                  |   4 ++--
 run2.cmd                                            |   2 +-
 28 files changed, 13 insertions(+), 13 deletions(-)
 create mode 100755 pyspark-shell
 delete mode 100755 pyspark/pyspark-shell
 rename {pyspark => python}/.gitignore (100%)
 rename {pyspark => python}/epydoc.conf (100%)
 rename {pyspark => python}/examples/kmeans.py (100%)
 rename {pyspark => python}/examples/logistic_regression.py (100%)
 rename {pyspark => python}/examples/pi.py (100%)
 rename {pyspark => python}/examples/transitive_closure.py (100%)
 rename {pyspark => python}/examples/wordcount.py (100%)
 rename {pyspark => python}/lib/PY4J_LICENSE.txt (100%)
 rename {pyspark => python}/lib/PY4J_VERSION.txt (100%)
 rename {pyspark => python}/lib/py4j0.7.egg (100%)
 rename {pyspark => python}/lib/py4j0.7.jar (100%)
 rename {pyspark => python}/pyspark/__init__.py (82%)
 rename {pyspark => python}/pyspark/broadcast.py (100%)
 rename {pyspark => python}/pyspark/cloudpickle.py (100%)
 rename {pyspark => python}/pyspark/context.py (100%)
 rename {pyspark => python}/pyspark/java_gateway.py (100%)
 rename {pyspark => python}/pyspark/join.py (100%)
 rename {pyspark => python}/pyspark/rdd.py (100%)
 rename {pyspark => python}/pyspark/serializers.py (100%)
 rename {pyspark => python}/pyspark/shell.py (100%)
 rename {pyspark => python}/pyspark/worker.py (100%)
 rename pyspark/run-pyspark => run-pyspark (86%)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 19a039e330..cf60d14f03 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -38,7 +38,7 @@ private[spark] class PythonRDD[T: ClassManifest](
   override def compute(split: Split, context: TaskContext): Iterator[Array[Byte]] = {
     val SPARK_HOME = new ProcessBuilder().environment().get("SPARK_HOME")
 
-    val pb = new ProcessBuilder(Seq(pythonExec, SPARK_HOME + "/pyspark/pyspark/worker.py"))
+    val pb = new ProcessBuilder(Seq(pythonExec, SPARK_HOME + "/python/pyspark/worker.py"))
     // Add the environmental variables to the process.
     val currentEnvVars = pb.environment()
 
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 577f3ebe70..c9ce589c1b 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -30,8 +30,8 @@ if ENV['SKIP_SCALADOC'] != '1'
 end
 
 if ENV['SKIP_EPYDOC'] != '1'
-  puts "Moving to pyspark directory and building epydoc."
-  cd("../pyspark")
+  puts "Moving to python directory and building epydoc."
+  cd("../python")
   puts `epydoc --config epydoc.conf`
 
   puts "Moving back into docs dir."
@@ -40,8 +40,8 @@ if ENV['SKIP_EPYDOC'] != '1'
   puts "echo making directory pyspark"
   mkdir_p "pyspark"
 
-  puts "cp -r ../pyspark/docs/. api/pyspark"
-  cp_r("../pyspark/docs/.", "api/pyspark")
+  puts "cp -r ../python/docs/. api/pyspark"
+  cp_r("../python/docs/.", "api/pyspark")
 
   cd("..")
 end
diff --git a/pyspark-shell b/pyspark-shell
new file mode 100755
index 0000000000..27aaac3a26
--- /dev/null
+++ b/pyspark-shell
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+FWDIR="`dirname $0`"
+exec $FWDIR/run-pyspark $FWDIR/python/pyspark/shell.py "$@"
diff --git a/pyspark/pyspark-shell b/pyspark/pyspark-shell
deleted file mode 100755
index e3736826e8..0000000000
--- a/pyspark/pyspark-shell
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-FWDIR="`dirname $0`"
-exec $FWDIR/run-pyspark $FWDIR/pyspark/shell.py "$@"
diff --git a/pyspark/.gitignore b/python/.gitignore
similarity index 100%
rename from pyspark/.gitignore
rename to python/.gitignore
diff --git a/pyspark/epydoc.conf b/python/epydoc.conf
similarity index 100%
rename from pyspark/epydoc.conf
rename to python/epydoc.conf
diff --git a/pyspark/examples/kmeans.py b/python/examples/kmeans.py
similarity index 100%
rename from pyspark/examples/kmeans.py
rename to python/examples/kmeans.py
diff --git a/pyspark/examples/logistic_regression.py b/python/examples/logistic_regression.py
similarity index 100%
rename from pyspark/examples/logistic_regression.py
rename to python/examples/logistic_regression.py
diff --git a/pyspark/examples/pi.py b/python/examples/pi.py
similarity index 100%
rename from pyspark/examples/pi.py
rename to python/examples/pi.py
diff --git a/pyspark/examples/transitive_closure.py b/python/examples/transitive_closure.py
similarity index 100%
rename from pyspark/examples/transitive_closure.py
rename to python/examples/transitive_closure.py
diff --git a/pyspark/examples/wordcount.py b/python/examples/wordcount.py
similarity index 100%
rename from pyspark/examples/wordcount.py
rename to python/examples/wordcount.py
diff --git a/pyspark/lib/PY4J_LICENSE.txt b/python/lib/PY4J_LICENSE.txt
similarity index 100%
rename from pyspark/lib/PY4J_LICENSE.txt
rename to python/lib/PY4J_LICENSE.txt
diff --git a/pyspark/lib/PY4J_VERSION.txt b/python/lib/PY4J_VERSION.txt
similarity index 100%
rename from pyspark/lib/PY4J_VERSION.txt
rename to python/lib/PY4J_VERSION.txt
diff --git a/pyspark/lib/py4j0.7.egg b/python/lib/py4j0.7.egg
similarity index 100%
rename from pyspark/lib/py4j0.7.egg
rename to python/lib/py4j0.7.egg
diff --git a/pyspark/lib/py4j0.7.jar b/python/lib/py4j0.7.jar
similarity index 100%
rename from pyspark/lib/py4j0.7.jar
rename to python/lib/py4j0.7.jar
diff --git a/pyspark/pyspark/__init__.py b/python/pyspark/__init__.py
similarity index 82%
rename from pyspark/pyspark/__init__.py
rename to python/pyspark/__init__.py
index 1ab360a666..c595ae0842 100644
--- a/pyspark/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -10,7 +10,7 @@ Public classes:
 """
 import sys
 import os
-sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "pyspark/lib/py4j0.7.egg"))
+sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg"))
 
 
 from pyspark.context import SparkContext
diff --git a/pyspark/pyspark/broadcast.py b/python/pyspark/broadcast.py
similarity index 100%
rename from pyspark/pyspark/broadcast.py
rename to python/pyspark/broadcast.py
diff --git a/pyspark/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
similarity index 100%
rename from pyspark/pyspark/cloudpickle.py
rename to python/pyspark/cloudpickle.py
diff --git a/pyspark/pyspark/context.py b/python/pyspark/context.py
similarity index 100%
rename from pyspark/pyspark/context.py
rename to python/pyspark/context.py
diff --git a/pyspark/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
similarity index 100%
rename from pyspark/pyspark/java_gateway.py
rename to python/pyspark/java_gateway.py
diff --git a/pyspark/pyspark/join.py b/python/pyspark/join.py
similarity index 100%
rename from pyspark/pyspark/join.py
rename to python/pyspark/join.py
diff --git a/pyspark/pyspark/rdd.py b/python/pyspark/rdd.py
similarity index 100%
rename from pyspark/pyspark/rdd.py
rename to python/pyspark/rdd.py
diff --git a/pyspark/pyspark/serializers.py b/python/pyspark/serializers.py
similarity index 100%
rename from pyspark/pyspark/serializers.py
rename to python/pyspark/serializers.py
diff --git a/pyspark/pyspark/shell.py b/python/pyspark/shell.py
similarity index 100%
rename from pyspark/pyspark/shell.py
rename to python/pyspark/shell.py
diff --git a/pyspark/pyspark/worker.py b/python/pyspark/worker.py
similarity index 100%
rename from pyspark/pyspark/worker.py
rename to python/pyspark/worker.py
diff --git a/run b/run
index ed788c4db3..08e2b2434b 100755
--- a/run
+++ b/run
@@ -63,7 +63,7 @@ CORE_DIR="$FWDIR/core"
 REPL_DIR="$FWDIR/repl"
 EXAMPLES_DIR="$FWDIR/examples"
 BAGEL_DIR="$FWDIR/bagel"
-PYSPARK_DIR="$FWDIR/pyspark"
+PYSPARK_DIR="$FWDIR/python"
 
 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH"
diff --git a/pyspark/run-pyspark b/run-pyspark
similarity index 86%
rename from pyspark/run-pyspark
rename to run-pyspark
index 4d10fbea8b..deb0d708b3 100755
--- a/pyspark/run-pyspark
+++ b/run-pyspark
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 # Figure out where the Scala framework is installed
-FWDIR="$(cd `dirname $0`; cd ../; pwd)"
+FWDIR="$(cd `dirname $0`; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
@@ -18,7 +18,7 @@ fi
 export PYSPARK_PYTHON
 
 # Add the PySpark classes to the Python path:
-export PYTHONPATH=$SPARK_HOME/pyspark/:$PYTHONPATH
+export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
 
 # Launch with `scala` by default:
 if [[ "$SPARK_LAUNCH_WITH_SCALA" != "0" ]] ; then
diff --git a/run2.cmd b/run2.cmd
index 9c50804e69..83464b1166 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -34,7 +34,7 @@ set CORE_DIR=%FWDIR%core
 set REPL_DIR=%FWDIR%repl
 set EXAMPLES_DIR=%FWDIR%examples
 set BAGEL_DIR=%FWDIR%bagel
-set PYSPARK_DIR=%FWDIR%pyspark
+set PYSPARK_DIR=%FWDIR%python
 
 rem Build up classpath
 set CLASSPATH=%SPARK_CLASSPATH%;%MESOS_CLASSPATH%;%FWDIR%conf;%CORE_DIR%\target\scala-%SCALA_VERSION%\classes

From ce9f1bbe20eff794cd1d588dc88f109d32588cfe Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Tue, 1 Jan 2013 21:25:49 -0800
Subject: [PATCH 044/291] Add `pyspark` script to replace the other scripts.

Expand the PySpark programming guide.
---
 docs/python-programming-guide.md | 49 ++++++++++++++++++++++++++++----
 docs/quick-start.md              |  4 +--
 run-pyspark => pyspark           |  4 +++
 pyspark-shell                    |  3 --
 python/pyspark/shell.py          | 36 +++++++----------------
 python/run-tests                 |  9 ++++++
 6 files changed, 69 insertions(+), 36 deletions(-)
 rename run-pyspark => pyspark (80%)
 delete mode 100755 pyspark-shell
 create mode 100755 python/run-tests

diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index d88d4eb42d..d963551296 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -24,6 +24,35 @@ There are a few key differences between the Python and Scala APIs:
     - `sample`
     - `sort`
 
+In PySpark, RDDs support the same methods as their Scala counterparts but take Python functions and return Python collection types.
+Short functions can be passed to RDD methods using Python's [`lambda`](http://www.diveintopython.net/power_of_introspection/lambda_functions.html) syntax:
+
+{% highlight python %}
+logData = sc.textFile(logFile).cache()
+errors = logData.filter(lambda s: 'ERROR' in s.split())
+{% endhighlight %}
+
+You can also pass functions that are defined using the `def` keyword; this is useful for more complicated functions that cannot be expressed using `lambda`:
+
+{% highlight python %}
+def is_error(line):
+    return 'ERROR' in line.split()
+errors = logData.filter(is_error)
+{% endhighlight %}
+
+Functions can access objects in enclosing scopes, although modifications to those objects within RDD methods will not be propagated to other tasks:
+
+{% highlight python %}
+error_keywords = ["Exception", "Error"]
+def is_error(line):
+     words = line.split()
+     return any(keyword in words for keyword in error_keywords)
+errors = logData.filter(is_error)
+{% endhighlight %}
+
+PySpark will automatically ship these functions to workers, along with any objects that they reference.
+Instances of classes will be serialized and shipped to workers by PySpark, but classes themselves cannot be automatically distributed to workers.
+The [Standalone Use](#standalone-use) section describes how to ship code dependencies to workers.
 
 # Installing and Configuring PySpark
 
@@ -34,13 +63,14 @@ By default, PySpark's scripts will run programs using `python`; an alternate Pyt
 
 All of PySpark's library dependencies, including [Py4J](http://py4j.sourceforge.net/), are bundled with PySpark and automatically imported.
 
-Standalone PySpark jobs should be run using the `run-pyspark` script, which automatically configures the Java and Python environmnt using the settings in `conf/spark-env.sh`.
+Standalone PySpark jobs should be run using the `pyspark` script, which automatically configures the Java and Python environment using the settings in `conf/spark-env.sh`.
 The script automatically adds the `pyspark` package to the `PYTHONPATH`.
 
 
 # Interactive Use
 
-PySpark's `pyspark-shell` script provides a simple way to learn the API:
+The `pyspark` script launches a Python interpreter that is configured to run PySpark jobs.
+When run without any input files, `pyspark` launches a shell that can be used explore data interactively, which is a simple way to learn the API:
 
 {% highlight python %}
 >>> words = sc.textFile("/usr/share/dict/words")
@@ -48,9 +78,18 @@ PySpark's `pyspark-shell` script provides a simple way to learn the API:
 [u'spar', u'sparable', u'sparada', u'sparadrap', u'sparagrass']
 {% endhighlight %}
 
+By default, the `pyspark` shell creates SparkContext that runs jobs locally.
+To connect to a non-local cluster, set the `MASTER` environment variable.
+For example, to use the `pyspark` shell with a [standalone Spark cluster](spark-standalone.html):
+
+{% highlight shell %}
+$ MASTER=spark://IP:PORT ./pyspark
+{% endhighlight %}
+
+
 # Standalone Use
 
-PySpark can also be used from standalone Python scripts by creating a SparkContext in the script and running the script using the `run-pyspark` script in the `pyspark` directory.
+PySpark can also be used from standalone Python scripts by creating a SparkContext in your script and running the script using `pyspark`.
 The Quick Start guide includes a [complete example](quick-start.html#a-standalone-job-in-python) of a standalone Python job.
 
 Code dependencies can be deployed by listing them in the `pyFiles` option in the SparkContext constructor:
@@ -65,8 +104,8 @@ Code dependencies can be added to an existing SparkContext using its `addPyFile(
 
 # Where to Go from Here
 
-PySpark includes several sample programs using the Python API in `pyspark/examples`.
-You can run them by passing the files to the `pyspark-run` script included in PySpark -- for example `./pyspark-run examples/wordcount.py`.
+PySpark includes several sample programs using the Python API in `python/examples`.
+You can run them by passing the files to the `pyspark` script -- for example `./pyspark python/examples/wordcount.py`.
 Each example program prints usage help when run without any arguments.
 
 We currently provide [API documentation](api/pyspark/index.html) for the Python API as Epydoc.
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 8c25df5486..2c7cfbed25 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -258,11 +258,11 @@ We can pass Python functions to Spark, which are automatically serialized along
 For jobs that use custom classes or third-party libraries, we can add those code dependencies to SparkContext to ensure that they will be available on remote machines; this is described in more detail in the [Python programming guide](python-programming-guide).
 `SimpleJob` is simple enough that we do not need to specify any code dependencies.
 
-We can run this job using the `run-pyspark` script in `$SPARK_HOME/pyspark`:
+We can run this job using the `pyspark` script:
 
 {% highlight python %}
 $ cd $SPARK_HOME
-$ ./pyspark/run-pyspark SimpleJob.py
+$ ./pyspark SimpleJob.py
 ...
 Lines with a: 8422, Lines with b: 1836
 {% endhighlight python %}
diff --git a/run-pyspark b/pyspark
similarity index 80%
rename from run-pyspark
rename to pyspark
index deb0d708b3..9e89d51ba2 100755
--- a/run-pyspark
+++ b/pyspark
@@ -20,6 +20,10 @@ export PYSPARK_PYTHON
 # Add the PySpark classes to the Python path:
 export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
 
+# Load the PySpark shell.py script when ./pyspark is used interactively:
+export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
+export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
+
 # Launch with `scala` by default:
 if [[ "$SPARK_LAUNCH_WITH_SCALA" != "0" ]] ; then
     export SPARK_LAUNCH_WITH_SCALA=1
diff --git a/pyspark-shell b/pyspark-shell
deleted file mode 100755
index 27aaac3a26..0000000000
--- a/pyspark-shell
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-FWDIR="`dirname $0`"
-exec $FWDIR/run-pyspark $FWDIR/python/pyspark/shell.py "$@"
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index bd39b0283f..7e6ad3aa76 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -1,33 +1,17 @@
 """
 An interactive shell.
-"""
-import optparse  # I prefer argparse, but it's not included with Python < 2.7
-import code
-import sys
 
+This fle is designed to be launched as a PYTHONSTARTUP script.
+"""
+import os
 from pyspark.context import SparkContext
 
 
-def main(master='local', ipython=False):
-    sc = SparkContext(master, 'PySparkShell')
-    user_ns = {'sc' : sc}
-    banner = "Spark context avaiable as sc."
-    if ipython:
-        import IPython
-        IPython.embed(user_ns=user_ns, banner2=banner)
-    else:
-        print banner
-        code.interact(local=user_ns)
+sc = SparkContext(os.environ.get("MASTER", "local"), "PySparkShell")
+print "Spark context avaiable as sc."
 
-
-if __name__ == '__main__':
-    usage = "usage: %prog [options] master"
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("-i", "--ipython", help="Run IPython shell",
-                      action="store_true")
-    (options, args) = parser.parse_args()
-    if len(sys.argv) > 1:
-        master = args[0]
-    else:
-        master = 'local'
-    main(master, options.ipython)
+# The ./pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,
+# which allows us to execute the user's PYTHONSTARTUP file:
+_pythonstartup = os.environ.get('OLD_PYTHONSTARTUP')
+if _pythonstartup and os.path.isfile(_pythonstartup):
+        execfile(_pythonstartup)
diff --git a/python/run-tests b/python/run-tests
new file mode 100755
index 0000000000..da9e24cb1f
--- /dev/null
+++ b/python/run-tests
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+# Figure out where the Scala framework is installed
+FWDIR="$(cd `dirname $0`; cd ../; pwd)"
+
+$FWDIR/pyspark pyspark/rdd.py
+$FWDIR/pyspark -m doctest pyspark/broadcast.py
+
+# TODO: in the long-run, it would be nice to use a test runner like `nose`.

From 33beba39656fc64984db09a82fc69ca4edcc02d4 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Thu, 3 Jan 2013 14:52:21 -0800
Subject: [PATCH 045/291] Change PySpark RDD.take() to not call iterator().

---
 core/src/main/scala/spark/api/python/PythonRDD.scala |  4 ++++
 python/pyspark/context.py                            |  1 +
 python/pyspark/rdd.py                                | 11 +++++------
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index cf60d14f03..79d824d494 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -10,6 +10,7 @@ import spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
 import spark.broadcast.Broadcast
 import spark._
 import spark.rdd.PipedRDD
+import java.util
 
 
 private[spark] class PythonRDD[T: ClassManifest](
@@ -216,6 +217,9 @@ private[spark] object PythonRDD {
     }
     file.close()
   }
+
+  def takePartition[T](rdd: RDD[T], partition: Int): java.util.Iterator[T] =
+    rdd.context.runJob(rdd, ((x: Iterator[T]) => x), Seq(partition), true).head
 }
 
 private object Pickle {
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 6172d69dcf..4439356c1f 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -21,6 +21,7 @@ class SparkContext(object):
     jvm = gateway.jvm
     _readRDDFromPickleFile = jvm.PythonRDD.readRDDFromPickleFile
     _writeIteratorToPickleFile = jvm.PythonRDD.writeIteratorToPickleFile
+    _takePartition = jvm.PythonRDD.takePartition
 
     def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
         environment=None, batchSize=1024):
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index cbffb6cc1f..4ba417b2a2 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -328,18 +328,17 @@ class RDD(object):
         a lot of partitions are required. In that case, use L{collect} to get
         the whole RDD instead.
 
-        >>> sc.parallelize([2, 3, 4, 5, 6]).take(2)
+        >>> sc.parallelize([2, 3, 4, 5, 6]).cache().take(2)
         [2, 3]
         >>> sc.parallelize([2, 3, 4, 5, 6]).take(10)
         [2, 3, 4, 5, 6]
         """
         items = []
-        splits = self._jrdd.splits()
-        taskContext = self.ctx.jvm.spark.TaskContext(0, 0, 0)
-        while len(items) < num and splits:
-            split = splits.pop(0)
-            iterator = self._jrdd.iterator(split, taskContext)
+        for partition in range(self._jrdd.splits().size()):
+            iterator = self.ctx._takePartition(self._jrdd.rdd(), partition)
             items.extend(self._collect_iterator_through_file(iterator))
+            if len(items) >= num:
+                break
         return items[:num]
 
     def first(self):

From 8d57c78c83f74e45ce3c119e2e3915d5eac264e7 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Sat, 5 Jan 2013 10:54:05 -0600
Subject: [PATCH 046/291] Add PairRDDFunctions.keys and values.

---
 core/src/main/scala/spark/PairRDDFunctions.scala | 10 ++++++++++
 core/src/test/scala/spark/ShuffleSuite.scala     |  7 +++++++
 2 files changed, 17 insertions(+)

diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala
index 413c944a66..ce48cea903 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/spark/PairRDDFunctions.scala
@@ -615,6 +615,16 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
     writer.cleanup()
   }
 
+  /**
+   * Return an RDD with the keys of each tuple.
+   */
+  def keys: RDD[K] = self.map(_._1)
+  
+  /**
+   * Return an RDD with the values of each tuple.
+   */
+  def values: RDD[V] = self.map(_._2)
+
   private[spark] def getKeyClass() = implicitly[ClassManifest[K]].erasure
 
   private[spark] def getValueClass() = implicitly[ClassManifest[V]].erasure
diff --git a/core/src/test/scala/spark/ShuffleSuite.scala b/core/src/test/scala/spark/ShuffleSuite.scala
index 8170100f1d..5a867016f2 100644
--- a/core/src/test/scala/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/spark/ShuffleSuite.scala
@@ -216,6 +216,13 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with BeforeAndAfter {
     // Test that a shuffle on the file works, because this used to be a bug
     assert(file.map(line => (line, 1)).reduceByKey(_ + _).collect().toList === Nil)
   }
+
+  test("kesy and values") {
+    sc = new SparkContext("local", "test")
+    val rdd = sc.parallelize(Array((1, "a"), (2, "b")))
+    assert(rdd.keys.collect().toList === List(1, 2))
+    assert(rdd.values.collect().toList === List("a", "b"))
+  }
 }
 
 object ShuffleSuite {

From f4e6b9361ffeec1018d5834f09db9fd86f2ba7bd Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Fri, 4 Jan 2013 22:43:22 -0600
Subject: [PATCH 047/291] Add RDD.collect(PartialFunction).

---
 core/src/main/scala/spark/RDD.scala      | 7 +++++++
 core/src/test/scala/spark/RDDSuite.scala | 1 +
 2 files changed, 8 insertions(+)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 7e38583391..5163c80134 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -329,6 +329,13 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
    */
   def toArray(): Array[T] = collect()
 
+  /**
+   * Return an RDD that contains all matching values by applying `f`.
+   */
+  def collect[U: ClassManifest](f: PartialFunction[T, U]): RDD[U] = {
+    filter(f.isDefinedAt).map(f)
+  }
+
   /**
    * Reduces the elements of this RDD using the specified associative binary operator.
    */
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index 45e6c5f840..872b06fd08 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -35,6 +35,7 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
     assert(nums.flatMap(x => 1 to x).collect().toList === List(1, 1, 2, 1, 2, 3, 1, 2, 3, 4))
     assert(nums.union(nums).collect().toList === List(1, 2, 3, 4, 1, 2, 3, 4))
     assert(nums.glom().map(_.toList).collect().toList === List(List(1, 2), List(3, 4)))
+    assert(nums.collect({ case i if i >= 3 => i.toString }).collect().toList === List("3", "4"))
     val partitionSums = nums.mapPartitions(iter => Iterator(iter.reduceLeft(_ + _)))
     assert(partitionSums.collect().toList === List(3, 7))
 

From 6a0db3b449a829f3e5cdf7229f6ee564268be1df Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Sat, 5 Jan 2013 12:56:17 -0600
Subject: [PATCH 048/291] Fix typo.

---
 core/src/test/scala/spark/ShuffleSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/spark/ShuffleSuite.scala b/core/src/test/scala/spark/ShuffleSuite.scala
index 5a867016f2..bebb8ebe86 100644
--- a/core/src/test/scala/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/spark/ShuffleSuite.scala
@@ -217,7 +217,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with BeforeAndAfter {
     assert(file.map(line => (line, 1)).reduceByKey(_ + _).collect().toList === Nil)
   }
 
-  test("kesy and values") {
+  test("keys and values") {
     sc = new SparkContext("local", "test")
     val rdd = sc.parallelize(Array((1, "a"), (2, "b")))
     assert(rdd.keys.collect().toList === List(1, 2))

From 1fdb6946b5d076ed0f1b4d2bca2a20b6cd22cbc3 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Sat, 5 Jan 2013 13:07:59 -0600
Subject: [PATCH 049/291] Add RDD.tupleBy.

---
 core/src/main/scala/spark/RDD.scala      | 7 +++++++
 core/src/test/scala/spark/RDDSuite.scala | 1 +
 2 files changed, 8 insertions(+)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 7e38583391..7aa4b0a173 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -510,6 +510,13 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
       .saveAsSequenceFile(path)
   }
 
+  /**
+   * Tuples the elements of this RDD by applying `f`.
+   */
+  def tupleBy[K](f: T => K): RDD[(K, T)] = {
+    map(x => (f(x), x))
+  }
+
   /** A private method for tests, to look at the contents of each partition */
   private[spark] def collectPartitions(): Array[Array[T]] = {
     sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index 45e6c5f840..7832884224 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -35,6 +35,7 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
     assert(nums.flatMap(x => 1 to x).collect().toList === List(1, 1, 2, 1, 2, 3, 1, 2, 3, 4))
     assert(nums.union(nums).collect().toList === List(1, 2, 3, 4, 1, 2, 3, 4))
     assert(nums.glom().map(_.toList).collect().toList === List(List(1, 2), List(3, 4)))
+    assert(nums.tupleBy(_.toString).collect().toList === List(("1", 1), ("2", 2), ("3", 3), ("4", 4)))
     val partitionSums = nums.mapPartitions(iter => Iterator(iter.reduceLeft(_ + _)))
     assert(partitionSums.collect().toList === List(3, 7))
 

From 86af64b0a6fde5a6418727a77b43bdfeda1b81cd Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 5 Jan 2013 20:54:08 -0500
Subject: [PATCH 050/291] Fix Accumulators in Java, and add a test for them

---
 core/src/main/scala/spark/Accumulators.scala  | 18 +++++++-
 core/src/main/scala/spark/SparkContext.scala  |  7 +--
 .../spark/api/java/JavaSparkContext.scala     | 23 ++++++----
 core/src/test/scala/spark/JavaAPISuite.java   | 44 +++++++++++++++++++
 4 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/spark/Accumulators.scala b/core/src/main/scala/spark/Accumulators.scala
index bacd0ace37..6280f25391 100644
--- a/core/src/main/scala/spark/Accumulators.scala
+++ b/core/src/main/scala/spark/Accumulators.scala
@@ -38,14 +38,28 @@ class Accumulable[R, T] (
    */
   def += (term: T) { value_ = param.addAccumulator(value_, term) }
 
+  /**
+   * Add more data to this accumulator / accumulable
+   * @param term the data to add
+   */
+  def add(term: T) { value_ = param.addAccumulator(value_, term) }
+
   /**
    * Merge two accumulable objects together
-   * 
+   *
    * Normally, a user will not want to use this version, but will instead call `+=`.
-   * @param term the other Accumulable that will get merged with this
+   * @param term the other `R` that will get merged with this
    */
   def ++= (term: R) { value_ = param.addInPlace(value_, term)}
 
+  /**
+   * Merge two accumulable objects together
+   *
+   * Normally, a user will not want to use this version, but will instead call `add`.
+   * @param term the other `R` that will get merged with this
+   */
+  def merge(term: R) { value_ = param.addInPlace(value_, term)}
+
   /**
    * Access the accumulator's current value; only allowed on master.
    */
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 4fd81bc63b..bbf8272eb3 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -382,11 +382,12 @@ class SparkContext(
     new Accumulator(initialValue, param)
 
   /**
-   * Create an [[spark.Accumulable]] shared variable, with a `+=` method
+   * Create an [[spark.Accumulable]] shared variable, to which tasks can add values with `+=`.
+   * Only the master can access the accumuable's `value`.
    * @tparam T accumulator type
    * @tparam R type that can be added to the accumulator
    */
-  def accumulable[T,R](initialValue: T)(implicit param: AccumulableParam[T,R]) =
+  def accumulable[T, R](initialValue: T)(implicit param: AccumulableParam[T, R]) =
     new Accumulable(initialValue, param)
 
   /**
@@ -404,7 +405,7 @@ class SparkContext(
    * Broadcast a read-only variable to the cluster, returning a [[spark.Broadcast]] object for
    * reading it in distributed functions. The variable will be sent to each cluster only once.
    */
-  def broadcast[T](value: T) = env.broadcastManager.newBroadcast[T] (value, isLocal)
+  def broadcast[T](value: T) = env.broadcastManager.newBroadcast[T](value, isLocal)
 
   /**
    * Add a file to be downloaded into the working directory of this Spark job on every node.
diff --git a/core/src/main/scala/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
index b7725313c4..bf9ad7a200 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
@@ -10,7 +10,7 @@ import org.apache.hadoop.mapred.InputFormat
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
-import spark.{Accumulator, AccumulatorParam, RDD, SparkContext}
+import spark.{Accumulable, AccumulableParam, Accumulator, AccumulatorParam, RDD, SparkContext}
 import spark.SparkContext.IntAccumulatorParam
 import spark.SparkContext.DoubleAccumulatorParam
 import spark.broadcast.Broadcast
@@ -265,25 +265,32 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
 
   /**
    * Create an [[spark.Accumulator]] integer variable, which tasks can "add" values
-   * to using the `+=` method. Only the master can access the accumulator's `value`.
+   * to using the `add` method. Only the master can access the accumulator's `value`.
    */
-  def intAccumulator(initialValue: Int): Accumulator[Int] =
-    sc.accumulator(initialValue)(IntAccumulatorParam)
+  def intAccumulator(initialValue: Int): Accumulator[java.lang.Integer] =
+    sc.accumulator(initialValue)(IntAccumulatorParam).asInstanceOf[Accumulator[java.lang.Integer]]
 
   /**
    * Create an [[spark.Accumulator]] double variable, which tasks can "add" values
-   * to using the `+=` method. Only the master can access the accumulator's `value`.
+   * to using the `add` method. Only the master can access the accumulator's `value`.
    */
-  def doubleAccumulator(initialValue: Double): Accumulator[Double] =
-    sc.accumulator(initialValue)(DoubleAccumulatorParam)
+  def doubleAccumulator(initialValue: Double): Accumulator[java.lang.Double] =
+    sc.accumulator(initialValue)(DoubleAccumulatorParam).asInstanceOf[Accumulator[java.lang.Double]]
 
   /**
    * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values
-   * to using the `+=` method. Only the master can access the accumulator's `value`.
+   * to using the `add` method. Only the master can access the accumulator's `value`.
    */
   def accumulator[T](initialValue: T, accumulatorParam: AccumulatorParam[T]): Accumulator[T] =
     sc.accumulator(initialValue)(accumulatorParam)
 
+  /**
+   * Create an [[spark.Accumulable]] shared variable of the given type, to which tasks can
+   * "add" values with `add`. Only the master can access the accumuable's `value`.
+   */
+  def accumulable[T, R](initialValue: T, param: AccumulableParam[T, R]): Accumulable[T, R] =
+    sc.accumulable(initialValue)(param)
+
   /**
    * Broadcast a read-only variable to the cluster, returning a [[spark.Broadcast]] object for
    * reading it in distributed functions. The variable will be sent to each cluster only once.
diff --git a/core/src/test/scala/spark/JavaAPISuite.java b/core/src/test/scala/spark/JavaAPISuite.java
index 33d5fc2d89..b99e790093 100644
--- a/core/src/test/scala/spark/JavaAPISuite.java
+++ b/core/src/test/scala/spark/JavaAPISuite.java
@@ -581,4 +581,48 @@ public class JavaAPISuite implements Serializable {
     JavaPairRDD<Integer, Double> zipped = rdd.zip(doubles);
     zipped.count();
   }
+
+  @Test
+  public void accumulators() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
+
+    final Accumulator<Integer> intAccum = sc.intAccumulator(10);
+    rdd.foreach(new VoidFunction<Integer>() {
+      public void call(Integer x) {
+        intAccum.add(x);
+      }
+    });
+    Assert.assertEquals((Integer) 25, intAccum.value());
+
+    final Accumulator<Double> doubleAccum = sc.doubleAccumulator(10.0);
+    rdd.foreach(new VoidFunction<Integer>() {
+      public void call(Integer x) {
+        doubleAccum.add((double) x);
+      }
+    });
+    Assert.assertEquals((Double) 25.0, doubleAccum.value());
+
+    // Try a custom accumulator type
+    AccumulatorParam<Float> floatAccumulatorParam = new AccumulatorParam<Float>() {
+      public Float addInPlace(Float r, Float t) {
+        return r + t;
+      }
+
+      public Float addAccumulator(Float r, Float t) {
+        return r + t;
+      }
+
+      public Float zero(Float initialValue) {
+        return 0.0f;
+      }
+    };
+
+    final Accumulator<Float> floatAccum = sc.accumulator((Float) 10.0f, floatAccumulatorParam);
+    rdd.foreach(new VoidFunction<Integer>() {
+      public void call(Integer x) {
+        floatAccum.add((float) x);
+      }
+    });
+    Assert.assertEquals((Float) 25.0f, floatAccum.value());
+  }
 }

From 0982572519655354b10987de4f68e29b8331bd2a Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 5 Jan 2013 22:11:28 -0500
Subject: [PATCH 051/291] Add methods called just 'accumulator' for int/double
 in Java API

---
 .../scala/spark/api/java/JavaSparkContext.scala     | 13 +++++++++++++
 core/src/test/scala/spark/JavaAPISuite.java         |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
index bf9ad7a200..88ab2846be 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
@@ -277,6 +277,19 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def doubleAccumulator(initialValue: Double): Accumulator[java.lang.Double] =
     sc.accumulator(initialValue)(DoubleAccumulatorParam).asInstanceOf[Accumulator[java.lang.Double]]
 
+  /**
+   * Create an [[spark.Accumulator]] integer variable, which tasks can "add" values
+   * to using the `add` method. Only the master can access the accumulator's `value`.
+   */
+  def accumulator(initialValue: Int): Accumulator[java.lang.Integer] = intAccumulator(initialValue)
+
+  /**
+   * Create an [[spark.Accumulator]] double variable, which tasks can "add" values
+   * to using the `add` method. Only the master can access the accumulator's `value`.
+   */
+  def accumulator(initialValue: Double): Accumulator[java.lang.Double] =
+    doubleAccumulator(initialValue)
+
   /**
    * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
diff --git a/core/src/test/scala/spark/JavaAPISuite.java b/core/src/test/scala/spark/JavaAPISuite.java
index b99e790093..912f8de05d 100644
--- a/core/src/test/scala/spark/JavaAPISuite.java
+++ b/core/src/test/scala/spark/JavaAPISuite.java
@@ -586,7 +586,7 @@ public class JavaAPISuite implements Serializable {
   public void accumulators() {
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 
-    final Accumulator<Integer> intAccum = sc.intAccumulator(10);
+    final Accumulator<Integer> intAccum = sc.accumulator(10);
     rdd.foreach(new VoidFunction<Integer>() {
       public void call(Integer x) {
         intAccum.add(x);
@@ -594,7 +594,7 @@ public class JavaAPISuite implements Serializable {
     });
     Assert.assertEquals((Integer) 25, intAccum.value());
 
-    final Accumulator<Double> doubleAccum = sc.doubleAccumulator(10.0);
+    final Accumulator<Double> doubleAccum = sc.accumulator(10.0);
     rdd.foreach(new VoidFunction<Integer>() {
       public void call(Integer x) {
         doubleAccum.add((double) x);

From 8fd3a70c188182105f81f5143ec65e74663582d5 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 5 Jan 2013 22:46:45 -0500
Subject: [PATCH 052/291] Add PairRDD.keys() and values() to Java API

---
 core/src/main/scala/spark/api/java/JavaPairRDD.scala | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/core/src/main/scala/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/spark/api/java/JavaPairRDD.scala
index 5c2be534ff..8ce32e0e2f 100644
--- a/core/src/main/scala/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/spark/api/java/JavaPairRDD.scala
@@ -471,6 +471,16 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
     implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x)
     fromRDD(new OrderedRDDFunctions(rdd).sortByKey(ascending))
   }
+
+  /**
+   * Return an RDD with the keys of each tuple.
+   */
+  def keys(): JavaRDD[K] = JavaRDD.fromRDD[K](rdd.map(_._1))
+
+  /**
+   * Return an RDD with the values of each tuple.
+   */
+  def values(): JavaRDD[V] = JavaRDD.fromRDD[V](rdd.map(_._2))
 }
 
 object JavaPairRDD {

From 8dc06069fe2330c3ee0fcaaeb0ae6e627a5887c3 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Sun, 6 Jan 2013 15:21:45 -0600
Subject: [PATCH 053/291] Rename RDD.tupleBy to keyBy.

---
 core/src/main/scala/spark/RDD.scala      | 4 ++--
 core/src/test/scala/spark/RDDSuite.scala | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 7aa4b0a173..5ce524c0e7 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -511,9 +511,9 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
   }
 
   /**
-   * Tuples the elements of this RDD by applying `f`.
+   * Creates tuples of the elements in this RDD by applying `f`.
    */
-  def tupleBy[K](f: T => K): RDD[(K, T)] = {
+  def keyBy[K](f: T => K): RDD[(K, T)] = {
     map(x => (f(x), x))
   }
 
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index 7832884224..77bff8aba1 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -35,7 +35,7 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
     assert(nums.flatMap(x => 1 to x).collect().toList === List(1, 1, 2, 1, 2, 3, 1, 2, 3, 4))
     assert(nums.union(nums).collect().toList === List(1, 2, 3, 4, 1, 2, 3, 4))
     assert(nums.glom().map(_.toList).collect().toList === List(List(1, 2), List(3, 4)))
-    assert(nums.tupleBy(_.toString).collect().toList === List(("1", 1), ("2", 2), ("3", 3), ("4", 4)))
+    assert(nums.keyBy(_.toString).collect().toList === List(("1", 1), ("2", 2), ("3", 3), ("4", 4)))
     val partitionSums = nums.mapPartitions(iter => Iterator(iter.reduceLeft(_ + _)))
     assert(partitionSums.collect().toList === List(3, 7))
 

From 9c32f300fb4151a2b563bf3d2e46469722e016e1 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 7 Jan 2013 16:50:23 -0500
Subject: [PATCH 054/291] Add Accumulable.setValue for easier use in Java

---
 core/src/main/scala/spark/Accumulators.scala | 20 +++++++++++++++-----
 core/src/test/scala/spark/JavaAPISuite.java  |  4 ++++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/spark/Accumulators.scala b/core/src/main/scala/spark/Accumulators.scala
index 6280f25391..b644aba5f8 100644
--- a/core/src/main/scala/spark/Accumulators.scala
+++ b/core/src/main/scala/spark/Accumulators.scala
@@ -63,9 +63,12 @@ class Accumulable[R, T] (
   /**
    * Access the accumulator's current value; only allowed on master.
    */
-  def value = {
-    if (!deserialized) value_
-    else throw new UnsupportedOperationException("Can't read accumulator value in task")
+  def value: R = {
+    if (!deserialized) {
+      value_
+    } else {
+      throw new UnsupportedOperationException("Can't read accumulator value in task")
+    }
   }
 
   /**
@@ -82,10 +85,17 @@ class Accumulable[R, T] (
   /**
    * Set the accumulator's value; only allowed on master.
    */
-  def value_= (r: R) {
-    if (!deserialized) value_ = r
+  def value_= (newValue: R) {
+    if (!deserialized) value_ = newValue
     else throw new UnsupportedOperationException("Can't assign accumulator value in task")
   }
+
+  /**
+   * Set the accumulator's value; only allowed on master
+   */
+  def setValue(newValue: R) {
+    this.value = newValue
+  }
  
   // Called by Java when deserializing an object
   private def readObject(in: ObjectInputStream) {
diff --git a/core/src/test/scala/spark/JavaAPISuite.java b/core/src/test/scala/spark/JavaAPISuite.java
index 912f8de05d..0817d1146c 100644
--- a/core/src/test/scala/spark/JavaAPISuite.java
+++ b/core/src/test/scala/spark/JavaAPISuite.java
@@ -624,5 +624,9 @@ public class JavaAPISuite implements Serializable {
       }
     });
     Assert.assertEquals((Float) 25.0f, floatAccum.value());
+
+    // Test the setValue method
+    floatAccum.setValue(5.0f);
+    Assert.assertEquals((Float) 5.0f, floatAccum.value());
   }
 }

From f8d579a0c05b7d29b59e541b483ded471d14ec17 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Thu, 27 Dec 2012 13:30:07 -0800
Subject: [PATCH 055/291] Remove dependencies on sun jvm classes. Instead use
 reflection to infer HotSpot options and total physical memory size

---
 core/src/main/scala/spark/SizeEstimator.scala | 13 ++++++++---
 .../spark/deploy/worker/WorkerArguments.scala | 22 ++++++++++++++++---
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/spark/SizeEstimator.scala b/core/src/main/scala/spark/SizeEstimator.scala
index 7c3e8640e9..d4e1157250 100644
--- a/core/src/main/scala/spark/SizeEstimator.scala
+++ b/core/src/main/scala/spark/SizeEstimator.scala
@@ -9,7 +9,6 @@ import java.util.Random
 
 import javax.management.MBeanServer
 import java.lang.management.ManagementFactory
-import com.sun.management.HotSpotDiagnosticMXBean
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -76,12 +75,20 @@ private[spark] object SizeEstimator extends Logging {
     if (System.getProperty("spark.test.useCompressedOops") != null) {
       return System.getProperty("spark.test.useCompressedOops").toBoolean 
     }
+
     try {
       val hotSpotMBeanName = "com.sun.management:type=HotSpotDiagnostic"
       val server = ManagementFactory.getPlatformMBeanServer()
+
+      // NOTE: This should throw an exception in non-Sun JVMs
+      val hotSpotMBeanClass = Class.forName("com.sun.management.HotSpotDiagnosticMXBean")
+      val getVMMethod = hotSpotMBeanClass.getDeclaredMethod("getVMOption",
+          Class.forName("java.lang.String"))
+
       val bean = ManagementFactory.newPlatformMXBeanProxy(server, 
-        hotSpotMBeanName, classOf[HotSpotDiagnosticMXBean])
-      return bean.getVMOption("UseCompressedOops").getValue.toBoolean
+        hotSpotMBeanName, hotSpotMBeanClass)
+      // TODO: We could use reflection on the VMOption returned ?
+      return getVMMethod.invoke(bean, "UseCompressedOops").toString.contains("true")
     } catch {
       case e: Exception => {
         // Guess whether they've enabled UseCompressedOops based on whether maxMemory < 32 GB
diff --git a/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala
index 340920025b..37524a7c82 100644
--- a/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala
@@ -104,9 +104,25 @@ private[spark] class WorkerArguments(args: Array[String]) {
   }
 
   def inferDefaultMemory(): Int = {
-    val bean = ManagementFactory.getOperatingSystemMXBean
-                                .asInstanceOf[com.sun.management.OperatingSystemMXBean]
-    val totalMb = (bean.getTotalPhysicalMemorySize / 1024 / 1024).toInt
+    val ibmVendor = System.getProperty("java.vendor").contains("IBM")
+    var totalMb = 0
+    try {
+      val bean = ManagementFactory.getOperatingSystemMXBean()
+      if (ibmVendor) {
+        val beanClass = Class.forName("com.ibm.lang.management.OperatingSystemMXBean")
+        val method = beanClass.getDeclaredMethod("getTotalPhysicalMemory")
+        totalMb = (method.invoke(bean).asInstanceOf[Long] / 1024 / 1024).toInt
+      } else {
+        val beanClass = Class.forName("com.sun.management.OperatingSystemMXBean")
+        val method = beanClass.getDeclaredMethod("getTotalPhysicalMemorySize")
+        totalMb = (method.invoke(bean).asInstanceOf[Long] / 1024 / 1024).toInt
+      }
+    } catch {
+      case e: Exception => {
+        totalMb = 2*1024
+        System.out.println("Failed to get total physical memory. Using " + totalMb + " MB")
+      }
+    }
     // Leave out 1 GB for the operating system, but don't return a negative memory size
     math.max(totalMb - 1024, 512)
   }

From aed368a970bbaee4bdf297ba3f6f1b0fa131452c Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Sat, 29 Dec 2012 16:23:43 -0800
Subject: [PATCH 056/291] Update Hadoop dependency to 1.0.3 as 0.20 has Sun
 specific dependencies. Also fix SequenceFileRDDFunctions to pick the right
 type conversion across Hadoop versions

---
 core/src/main/scala/spark/SequenceFileRDDFunctions.scala | 8 +++++++-
 project/SparkBuild.scala                                 | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/SequenceFileRDDFunctions.scala b/core/src/main/scala/spark/SequenceFileRDDFunctions.scala
index a34aee69c1..6b4a11d6d3 100644
--- a/core/src/main/scala/spark/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/spark/SequenceFileRDDFunctions.scala
@@ -42,7 +42,13 @@ class SequenceFileRDDFunctions[K <% Writable: ClassManifest, V <% Writable : Cla
       if (classOf[Writable].isAssignableFrom(classManifest[T].erasure)) { 
         classManifest[T].erasure
       } else {
-        implicitly[T => Writable].getClass.getMethods()(0).getReturnType
+        // We get the type of the Writable class by looking at the apply method which converts
+        // from T to Writable. Since we have two apply methods we filter out the one which
+        // is of the form "java.lang.Object apply(java.lang.Object)"
+        implicitly[T => Writable].getClass.getDeclaredMethods().filter(
+            m => m.getReturnType().toString != "java.lang.Object" &&
+                 m.getName() == "apply")(0).getReturnType
+
       }
        // TODO: use something like WritableConverter to avoid reflection
     }
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 842d0fa96b..7c7c33131a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -10,7 +10,7 @@ import twirl.sbt.TwirlPlugin._
 object SparkBuild extends Build {
   // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or
   // "1.0.3" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop.
-  val HADOOP_VERSION = "0.20.205.0"
+  val HADOOP_VERSION = "1.0.3"
   val HADOOP_MAJOR_VERSION = "1"
 
   // For Hadoop 2 versions such as "2.0.0-mr1-cdh4.1.1", set the HADOOP_MAJOR_VERSION to "2"

From 77d751731ccd06e161e3ef10540f8165d964282f Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Sat, 29 Dec 2012 18:28:00 -0800
Subject: [PATCH 057/291] Remove unused BoundedMemoryCache file and associated
 test case.

---
 .../main/scala/spark/BoundedMemoryCache.scala | 118 ------------------
 .../scala/spark/BoundedMemoryCacheSuite.scala |  58 ---------
 2 files changed, 176 deletions(-)
 delete mode 100644 core/src/main/scala/spark/BoundedMemoryCache.scala
 delete mode 100644 core/src/test/scala/spark/BoundedMemoryCacheSuite.scala

diff --git a/core/src/main/scala/spark/BoundedMemoryCache.scala b/core/src/main/scala/spark/BoundedMemoryCache.scala
deleted file mode 100644
index e8392a194f..0000000000
--- a/core/src/main/scala/spark/BoundedMemoryCache.scala
+++ /dev/null
@@ -1,118 +0,0 @@
-package spark
-
-import java.util.LinkedHashMap
-
-/**
- * An implementation of Cache that estimates the sizes of its entries and attempts to limit its
- * total memory usage to a fraction of the JVM heap. Objects' sizes are estimated using
- * SizeEstimator, which has limitations; most notably, we will overestimate total memory used if
- * some cache entries have pointers to a shared object. Nonetheless, this Cache should work well
- * when most of the space is used by arrays of primitives or of simple classes.
- */
-private[spark] class BoundedMemoryCache(maxBytes: Long) extends Cache with Logging {
-  logInfo("BoundedMemoryCache.maxBytes = " + maxBytes)
-
-  def this() {
-    this(BoundedMemoryCache.getMaxBytes)
-  }
-
-  private var currentBytes = 0L
-  private val map = new LinkedHashMap[(Any, Int), Entry](32, 0.75f, true)
-
-  override def get(datasetId: Any, partition: Int): Any = {
-    synchronized {
-      val entry = map.get((datasetId, partition))
-      if (entry != null) {
-        entry.value
-      } else {
-        null
-      }
-    }
-  }
-
-  override def put(datasetId: Any, partition: Int, value: Any): CachePutResponse = {
-    val key = (datasetId, partition)
-    logInfo("Asked to add key " + key)
-    val size = estimateValueSize(key, value)
-    synchronized {
-      if (size > getCapacity) {
-        return CachePutFailure()
-      } else if (ensureFreeSpace(datasetId, size)) {
-        logInfo("Adding key " + key)
-        map.put(key, new Entry(value, size))
-        currentBytes += size
-        logInfo("Number of entries is now " + map.size)
-        return CachePutSuccess(size)
-      } else {
-        logInfo("Didn't add key " + key + " because we would have evicted part of same dataset")
-        return CachePutFailure()
-      }
-    }
-  }
-
-  override def getCapacity: Long = maxBytes
-
-  /**
-   * Estimate sizeOf 'value'
-   */
-  private def estimateValueSize(key: (Any, Int), value: Any) = {
-    val startTime = System.currentTimeMillis
-    val size = SizeEstimator.estimate(value.asInstanceOf[AnyRef])
-    val timeTaken = System.currentTimeMillis - startTime
-    logInfo("Estimated size for key %s is %d".format(key, size))
-    logInfo("Size estimation for key %s took %d ms".format(key, timeTaken))
-    size
-  }
-
-  /**
-   * Remove least recently used entries from the map until at least space bytes are free, in order
-   * to make space for a partition from the given dataset ID. If this cannot be done without
-   * evicting other data from the same dataset, returns false; otherwise, returns true. Assumes
-   * that a lock is held on the BoundedMemoryCache.
-   */
-  private def ensureFreeSpace(datasetId: Any, space: Long): Boolean = {
-    logInfo("ensureFreeSpace(%s, %d) called with curBytes=%d, maxBytes=%d".format(
-      datasetId, space, currentBytes, maxBytes))
-    val iter = map.entrySet.iterator   // Will give entries in LRU order
-    while (maxBytes - currentBytes < space && iter.hasNext) {
-      val mapEntry = iter.next()
-      val (entryDatasetId, entryPartition) = mapEntry.getKey
-      if (entryDatasetId == datasetId) {
-        // Cannot make space without removing part of the same dataset, or a more recently used one
-        return false
-      }
-      reportEntryDropped(entryDatasetId, entryPartition, mapEntry.getValue)
-      currentBytes -= mapEntry.getValue.size
-      iter.remove()
-    }
-    return true
-  }
-
-  protected def reportEntryDropped(datasetId: Any, partition: Int, entry: Entry) {
-    logInfo("Dropping key (%s, %d) of size %d to make space".format(datasetId, partition, entry.size))
-    // TODO: remove BoundedMemoryCache
-    
-    val (keySpaceId, innerDatasetId) = datasetId.asInstanceOf[(Any, Any)] 
-    innerDatasetId match {
-      case rddId: Int =>
-        SparkEnv.get.cacheTracker.dropEntry(rddId, partition)
-      case broadcastUUID: java.util.UUID =>
-        // TODO: Maybe something should be done if the broadcasted variable falls out of cache  
-      case _ => 
-    }    
-  }
-}
-
-// An entry in our map; stores a cached object and its size in bytes
-private[spark] case class Entry(value: Any, size: Long)
-
-private[spark] object BoundedMemoryCache {
-  /**
-   * Get maximum cache capacity from system configuration
-   */
-   def getMaxBytes: Long = {
-    val memoryFractionToUse = System.getProperty("spark.boundedMemoryCache.memoryFraction", "0.66").toDouble
-    (Runtime.getRuntime.maxMemory * memoryFractionToUse).toLong
-  }
-}
-
diff --git a/core/src/test/scala/spark/BoundedMemoryCacheSuite.scala b/core/src/test/scala/spark/BoundedMemoryCacheSuite.scala
deleted file mode 100644
index 37cafd1e8e..0000000000
--- a/core/src/test/scala/spark/BoundedMemoryCacheSuite.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-package spark
-
-import org.scalatest.FunSuite
-import org.scalatest.PrivateMethodTester
-import org.scalatest.matchers.ShouldMatchers
-
-// TODO: Replace this with a test of MemoryStore
-class BoundedMemoryCacheSuite extends FunSuite with PrivateMethodTester with ShouldMatchers {
-  test("constructor test") {
-    val cache = new BoundedMemoryCache(60)
-    expect(60)(cache.getCapacity)
-  }
-
-  test("caching") {
-    // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case 
-    val oldArch = System.setProperty("os.arch", "amd64")
-    val oldOops = System.setProperty("spark.test.useCompressedOops", "true")
-    val initialize = PrivateMethod[Unit]('initialize)
-    SizeEstimator invokePrivate initialize()
-
-    val cache = new BoundedMemoryCache(60) {
-      //TODO sorry about this, but there is not better way how to skip 'cacheTracker.dropEntry'
-      override protected def reportEntryDropped(datasetId: Any, partition: Int, entry: Entry) {
-        logInfo("Dropping key (%s, %d) of size %d to make space".format(datasetId, partition, entry.size))
-      }
-    }
-
-    // NOTE: The String class definition changed in JDK 7 to exclude the int fields count and length
-    // This means that the size of strings will be lesser by 8 bytes in JDK 7 compared to JDK 6.
-    // http://mail.openjdk.java.net/pipermail/core-libs-dev/2012-May/010257.html
-    // Work around to check for either.
-
-    //should be OK
-    cache.put("1", 0, "Meh") should (equal (CachePutSuccess(56)) or equal (CachePutSuccess(48)))
-
-    //we cannot add this to cache (there is not enough space in cache) & we cannot evict the only value from
-    //cache because it's from the same dataset
-    expect(CachePutFailure())(cache.put("1", 1, "Meh"))
-
-    //should be OK, dataset '1' can be evicted from cache
-    cache.put("2", 0, "Meh") should (equal (CachePutSuccess(56)) or equal (CachePutSuccess(48)))
-
-    //should fail, cache should obey it's capacity
-    expect(CachePutFailure())(cache.put("3", 0, "Very_long_and_useless_string"))
-
-    if (oldArch != null) {
-      System.setProperty("os.arch", oldArch)
-    } else {
-      System.clearProperty("os.arch")
-    }
-
-    if (oldOops != null) {
-      System.setProperty("spark.test.useCompressedOops", oldOops)
-    } else {
-      System.clearProperty("spark.test.useCompressedOops")
-    }
-  }
-}

From 55c66d365f76f3e5ecc6b850ba81c84b320f6772 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Mon, 7 Jan 2013 15:19:33 -0800
Subject: [PATCH 058/291] Use a dummy string class in Size Estimator tests to
 make it resistant to jdk versions

---
 .../test/scala/spark/SizeEstimatorSuite.scala | 33 ++++++++++++-------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/core/src/test/scala/spark/SizeEstimatorSuite.scala b/core/src/test/scala/spark/SizeEstimatorSuite.scala
index 17f366212b..bf3b2e1eed 100644
--- a/core/src/test/scala/spark/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/spark/SizeEstimatorSuite.scala
@@ -20,6 +20,15 @@ class DummyClass4(val d: DummyClass3) {
   val x: Int = 0
 }
 
+object DummyString {
+  def apply(str: String) : DummyString = new DummyString(str.toArray)
+}
+class DummyString(val arr: Array[Char]) {
+  override val hashCode: Int = 0
+  // JDK-7 has an extra hash32 field http://hg.openjdk.java.net/jdk7u/jdk7u6/jdk/rev/11987e85555f
+  @transient val hash32: Int = 0
+}
+
 class SizeEstimatorSuite
   extends FunSuite with BeforeAndAfterAll with PrivateMethodTester with ShouldMatchers {
 
@@ -50,10 +59,10 @@ class SizeEstimatorSuite
   // http://mail.openjdk.java.net/pipermail/core-libs-dev/2012-May/010257.html
   // Work around to check for either.
   test("strings") {
-    SizeEstimator.estimate("") should (equal (48) or equal (40))
-    SizeEstimator.estimate("a") should (equal (56) or equal (48))
-    SizeEstimator.estimate("ab") should (equal (56) or equal (48))
-    SizeEstimator.estimate("abcdefgh") should (equal(64) or equal(56))
+    SizeEstimator.estimate(DummyString("")) should (equal (48) or equal (40))
+    SizeEstimator.estimate(DummyString("a")) should (equal (56) or equal (48))
+    SizeEstimator.estimate(DummyString("ab")) should (equal (56) or equal (48))
+    SizeEstimator.estimate(DummyString("abcdefgh")) should (equal(64) or equal(56))
   }
 
   test("primitive arrays") {
@@ -105,10 +114,10 @@ class SizeEstimatorSuite
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
 
-    expect(40)(SizeEstimator.estimate(""))
-    expect(48)(SizeEstimator.estimate("a"))
-    expect(48)(SizeEstimator.estimate("ab"))
-    expect(56)(SizeEstimator.estimate("abcdefgh"))
+    expect(40)(SizeEstimator.estimate(DummyString("")))
+    expect(48)(SizeEstimator.estimate(DummyString("a")))
+    expect(48)(SizeEstimator.estimate(DummyString("ab")))
+    expect(56)(SizeEstimator.estimate(DummyString("abcdefgh")))
 
     resetOrClear("os.arch", arch)
   }
@@ -124,10 +133,10 @@ class SizeEstimatorSuite
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
 
-    SizeEstimator.estimate("") should (equal (64) or equal (56))
-    SizeEstimator.estimate("a") should (equal (72) or equal (64))
-    SizeEstimator.estimate("ab") should (equal (72) or equal (64))
-    SizeEstimator.estimate("abcdefgh") should (equal (80) or equal (72))
+    SizeEstimator.estimate(DummyString("")) should (equal (64) or equal (56))
+    SizeEstimator.estimate(DummyString("a")) should (equal (72) or equal (64))
+    SizeEstimator.estimate(DummyString("ab")) should (equal (72) or equal (64))
+    SizeEstimator.estimate(DummyString("abcdefgh")) should (equal (80) or equal (72))
 
     resetOrClear("os.arch", arch)
     resetOrClear("spark.test.useCompressedOops", oops)

From fb3d4d5e85cd4b094411bb08a32ab50cc62dc151 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Mon, 7 Jan 2013 16:46:06 -0800
Subject: [PATCH 059/291] Make default hadoop version 1.0.3 in pom.xml

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index b33cee26b8..fe5b1d0ee4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -489,7 +489,7 @@
           <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-core</artifactId>
-            <version>0.20.205.0</version>
+            <version>1.0.3</version>
           </dependency>
         </dependencies>
       </dependencyManagement>

From b1336e2fe458b92dcf60dcd249c41c7bdcc8be6d Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Mon, 7 Jan 2013 17:00:32 -0800
Subject: [PATCH 060/291] Update expected size of strings to match our dummy
 string class

---
 .../test/scala/spark/SizeEstimatorSuite.scala | 31 ++++++++-----------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/core/src/test/scala/spark/SizeEstimatorSuite.scala b/core/src/test/scala/spark/SizeEstimatorSuite.scala
index bf3b2e1eed..e235ef2f67 100644
--- a/core/src/test/scala/spark/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/spark/SizeEstimatorSuite.scala
@@ -3,7 +3,6 @@ package spark
 import org.scalatest.FunSuite
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.PrivateMethodTester
-import org.scalatest.matchers.ShouldMatchers
 
 class DummyClass1 {}
 
@@ -30,7 +29,7 @@ class DummyString(val arr: Array[Char]) {
 }
 
 class SizeEstimatorSuite
-  extends FunSuite with BeforeAndAfterAll with PrivateMethodTester with ShouldMatchers {
+  extends FunSuite with BeforeAndAfterAll with PrivateMethodTester {
 
   var oldArch: String = _
   var oldOops: String = _
@@ -54,15 +53,13 @@ class SizeEstimatorSuite
     expect(48)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
   }
 
-  // NOTE: The String class definition changed in JDK 7 to exclude the int fields count and length.
-  // This means that the size of strings will be lesser by 8 bytes in JDK 7 compared to JDK 6.
-  // http://mail.openjdk.java.net/pipermail/core-libs-dev/2012-May/010257.html
-  // Work around to check for either.
+  // NOTE: The String class definition varies across JDK versions (1.6 vs. 1.7) and vendors
+  // (Sun vs IBM). Use a DummyString class to make tests deterministic.
   test("strings") {
-    SizeEstimator.estimate(DummyString("")) should (equal (48) or equal (40))
-    SizeEstimator.estimate(DummyString("a")) should (equal (56) or equal (48))
-    SizeEstimator.estimate(DummyString("ab")) should (equal (56) or equal (48))
-    SizeEstimator.estimate(DummyString("abcdefgh")) should (equal(64) or equal(56))
+    expect(40)(SizeEstimator.estimate(DummyString("")))
+    expect(48)(SizeEstimator.estimate(DummyString("a")))
+    expect(48)(SizeEstimator.estimate(DummyString("ab")))
+    expect(56)(SizeEstimator.estimate(DummyString("abcdefgh")))
   }
 
   test("primitive arrays") {
@@ -122,10 +119,8 @@ class SizeEstimatorSuite
     resetOrClear("os.arch", arch)
   }
 
-  // NOTE: The String class definition changed in JDK 7 to exclude the int fields count and length.
-  // This means that the size of strings will be lesser by 8 bytes in JDK 7 compared to JDK 6.
-  // http://mail.openjdk.java.net/pipermail/core-libs-dev/2012-May/010257.html
-  // Work around to check for either.
+  // NOTE: The String class definition varies across JDK versions (1.6 vs. 1.7) and vendors
+  // (Sun vs IBM). Use a DummyString class to make tests deterministic.
   test("64-bit arch with no compressed oops") {
     val arch = System.setProperty("os.arch", "amd64")
     val oops = System.setProperty("spark.test.useCompressedOops", "false")
@@ -133,10 +128,10 @@ class SizeEstimatorSuite
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
 
-    SizeEstimator.estimate(DummyString("")) should (equal (64) or equal (56))
-    SizeEstimator.estimate(DummyString("a")) should (equal (72) or equal (64))
-    SizeEstimator.estimate(DummyString("ab")) should (equal (72) or equal (64))
-    SizeEstimator.estimate(DummyString("abcdefgh")) should (equal (80) or equal (72))
+    expect(56)(SizeEstimator.estimate(DummyString("")))
+    expect(64)(SizeEstimator.estimate(DummyString("a")))
+    expect(64)(SizeEstimator.estimate(DummyString("ab")))
+    expect(72)(SizeEstimator.estimate(DummyString("abcdefgh")))
 
     resetOrClear("os.arch", arch)
     resetOrClear("spark.test.useCompressedOops", oops)

From 4bbe07e5ece81fa874d2412bcc165179313a7619 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Mon, 7 Jan 2013 17:46:22 -0800
Subject: [PATCH 061/291] Activate hadoop1 profile by default for maven builds

---
 bagel/pom.xml    | 3 +++
 core/pom.xml     | 5 ++++-
 examples/pom.xml | 3 +++
 pom.xml          | 3 +++
 repl-bin/pom.xml | 3 +++
 repl/pom.xml     | 3 +++
 6 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/bagel/pom.xml b/bagel/pom.xml
index a8256a6e8b..4ca643bbb7 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -45,6 +45,9 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
diff --git a/core/pom.xml b/core/pom.xml
index ae52c20657..cd789a7db0 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -159,6 +159,9 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
@@ -267,4 +270,4 @@
       </build>
     </profile>
   </profiles>
-</project>
\ No newline at end of file
+</project>
diff --git a/examples/pom.xml b/examples/pom.xml
index 782c026d73..9e638c8284 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -45,6 +45,9 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
diff --git a/pom.xml b/pom.xml
index fe5b1d0ee4..0e2d93c170 100644
--- a/pom.xml
+++ b/pom.xml
@@ -481,6 +481,9 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <properties>
         <hadoop.major.version>1</hadoop.major.version>
       </properties>
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index 0667b71cc7..aa9895eda2 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -70,6 +70,9 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <properties>
         <classifier>hadoop1</classifier>
       </properties>
diff --git a/repl/pom.xml b/repl/pom.xml
index 114e3e9932..ba7a051310 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -72,6 +72,9 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <properties>
         <classifier>hadoop1</classifier>
       </properties>

From c41042c816c2d6299aa7d93529b7c39db5d5c03a Mon Sep 17 00:00:00 2001
From: Mikhail Bautin <mbautin@gmail.com>
Date: Wed, 26 Dec 2012 15:52:51 -0800
Subject: [PATCH 062/291] Log preferred hosts

---
 .../main/scala/spark/scheduler/cluster/TaskSetManager.scala   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
index cf4aae03a7..dda7a6c64a 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
@@ -201,7 +201,9 @@ private[spark] class TaskSetManager(
           val taskId = sched.newTaskId()
           // Figure out whether this should count as a preferred launch
           val preferred = isPreferredLocation(task, host)
-          val prefStr = if (preferred) "preferred" else "non-preferred"
+          val prefStr = if (preferred) "preferred" else
+            "non-preferred, not one of " +
+            task.preferredLocations.mkString(", ")
           logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format(
             taskSet.id, index, taskId, slaveId, host, prefStr))
           // Do various bookkeeping

From 4725b0f6439337c7a0f5f6fc7034c6f6b9488ae9 Mon Sep 17 00:00:00 2001
From: Mikhail Bautin <mbautin@gmail.com>
Date: Mon, 7 Jan 2013 20:07:08 -0800
Subject: [PATCH 063/291] Fixing if/else coding style for preferred hosts
 logging

---
 .../main/scala/spark/scheduler/cluster/TaskSetManager.scala  | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
index dda7a6c64a..a842afcdeb 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
@@ -201,9 +201,8 @@ private[spark] class TaskSetManager(
           val taskId = sched.newTaskId()
           // Figure out whether this should count as a preferred launch
           val preferred = isPreferredLocation(task, host)
-          val prefStr = if (preferred) "preferred" else
-            "non-preferred, not one of " +
-            task.preferredLocations.mkString(", ")
+          val prefStr = if (preferred) "preferred"
+                        else "non-preferred, not one of " + task.preferredLocations.mkString(", ")
           logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format(
             taskSet.id, index, taskId, slaveId, host, prefStr))
           // Do various bookkeeping

From f7adb382ace7f54c5093bf90574b3f9dd0d35534 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Tue, 8 Jan 2013 03:19:43 -0800
Subject: [PATCH 064/291] Activate hadoop1 if property hadoop is missing.
 hadoop2 can be activated now by using -Dhadoop -Phadoop2.

---
 bagel/pom.xml    | 4 +++-
 core/pom.xml     | 4 +++-
 examples/pom.xml | 4 +++-
 pom.xml          | 4 +++-
 repl-bin/pom.xml | 4 +++-
 repl/pom.xml     | 4 +++-
 6 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/bagel/pom.xml b/bagel/pom.xml
index 4ca643bbb7..85b2077026 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -46,7 +46,9 @@
     <profile>
       <id>hadoop1</id>
       <activation>
-        <activeByDefault>true</activeByDefault>
+        <property>
+          <name>!hadoop</name>
+        </property>
       </activation>
       <dependencies>
         <dependency>
diff --git a/core/pom.xml b/core/pom.xml
index cd789a7db0..005d8fe498 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -160,7 +160,9 @@
     <profile>
       <id>hadoop1</id>
       <activation>
-        <activeByDefault>true</activeByDefault>
+        <property>
+          <name>!hadoop</name>
+        </property>
       </activation>
       <dependencies>
         <dependency>
diff --git a/examples/pom.xml b/examples/pom.xml
index 9e638c8284..3f738a3f8c 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -46,7 +46,9 @@
     <profile>
       <id>hadoop1</id>
       <activation>
-        <activeByDefault>true</activeByDefault>
+        <property>
+          <name>!hadoop</name>
+        </property>
       </activation>
       <dependencies>
         <dependency>
diff --git a/pom.xml b/pom.xml
index 0e2d93c170..ea5b9c9d05 100644
--- a/pom.xml
+++ b/pom.xml
@@ -482,7 +482,9 @@
     <profile>
       <id>hadoop1</id>
       <activation>
-        <activeByDefault>true</activeByDefault>
+        <property>
+          <name>!hadoop</name>
+        </property>
       </activation>
       <properties>
         <hadoop.major.version>1</hadoop.major.version>
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index aa9895eda2..fecb01f3cd 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -71,7 +71,9 @@
     <profile>
       <id>hadoop1</id>
       <activation>
-        <activeByDefault>true</activeByDefault>
+        <property>
+          <name>!hadoop</name>
+        </property>
       </activation>
       <properties>
         <classifier>hadoop1</classifier>
diff --git a/repl/pom.xml b/repl/pom.xml
index ba7a051310..04b2c35beb 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -73,7 +73,9 @@
     <profile>
       <id>hadoop1</id>
       <activation>
-        <activeByDefault>true</activeByDefault>
+        <property>
+          <name>!hadoop</name>
+        </property>
       </activation>
       <properties>
         <classifier>hadoop1</classifier>

From e4cb72da8a5428c6b9097e92ddbdf4ceee087b85 Mon Sep 17 00:00:00 2001
From: shane-huang <shengsheng.huang@intel.com>
Date: Tue, 8 Jan 2013 22:40:58 +0800
Subject: [PATCH 065/291] Fix an issue in ConnectionManager where
 sendingMessage may create too many unnecessary SendingConnections.

---
 .../main/scala/spark/network/Connection.scala  |  7 +++++--
 .../spark/network/ConnectionManager.scala      | 17 +++++++++--------
 .../spark/network/ConnectionManagerTest.scala  | 18 +++++++++---------
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/spark/network/Connection.scala b/core/src/main/scala/spark/network/Connection.scala
index 80262ab7b4..95096fd0ba 100644
--- a/core/src/main/scala/spark/network/Connection.scala
+++ b/core/src/main/scala/spark/network/Connection.scala
@@ -135,8 +135,11 @@ extends Connection(SocketChannel.open, selector_) {
           val chunk = message.getChunkForSending(defaultChunkSize)
           if (chunk.isDefined) {
             messages += message  // this is probably incorrect, it wont work as fifo
-            if (!message.started) logDebug("Starting to send [" + message + "]")
-            message.started = true
+            if (!message.started) {
+		logDebug("Starting to send [" + message + "]")
+ 		message.started = true
+	 	message.startTime = System.currentTimeMillis
+	    }
             return chunk 
           } else {
             /*logInfo("Finished sending [" + message + "] to [" + remoteConnectionManagerId + "]")*/
diff --git a/core/src/main/scala/spark/network/ConnectionManager.scala b/core/src/main/scala/spark/network/ConnectionManager.scala
index 642fa4b525..e7bd2d3bbd 100644
--- a/core/src/main/scala/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/spark/network/ConnectionManager.scala
@@ -43,12 +43,12 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
   }
   
   val selector = SelectorProvider.provider.openSelector()
-  val handleMessageExecutor = Executors.newFixedThreadPool(4) 
+  val handleMessageExecutor = Executors.newFixedThreadPool(20) 
   val serverChannel = ServerSocketChannel.open()
   val connectionsByKey = new HashMap[SelectionKey, Connection] with SynchronizedMap[SelectionKey, Connection] 
   val connectionsById = new HashMap[ConnectionManagerId, SendingConnection] with SynchronizedMap[ConnectionManagerId, SendingConnection]
   val messageStatuses = new HashMap[Int, MessageStatus] 
-  val connectionRequests = new SynchronizedQueue[SendingConnection]
+  val connectionRequests = new HashMap[ConnectionManagerId, SendingConnection] with SynchronizedMap[ConnectionManagerId, SendingConnection] 
   val keyInterestChangeRequests = new SynchronizedQueue[(SelectionKey, Int)]
   val sendMessageRequests = new Queue[(Message, SendingConnection)]
 
@@ -78,11 +78,12 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
 
   def run() {
     try {
-      while(!selectorThread.isInterrupted) {
-        while(!connectionRequests.isEmpty) {
-          val sendingConnection = connectionRequests.dequeue
+      while(!selectorThread.isInterrupted) {   
+        for( (connectionManagerId, sendingConnection) <- connectionRequests) {
+          //val sendingConnection = connectionRequests.dequeue
           sendingConnection.connect() 
           addConnection(sendingConnection)
+          connectionRequests -= connectionManagerId
         }
         sendMessageRequests.synchronized {
           while(!sendMessageRequests.isEmpty) {
@@ -300,8 +301,7 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
   private def sendMessage(connectionManagerId: ConnectionManagerId, message: Message) {
     def startNewConnection(): SendingConnection = {
       val inetSocketAddress = new InetSocketAddress(connectionManagerId.host, connectionManagerId.port)
-      val newConnection = new SendingConnection(inetSocketAddress, selector)
-      connectionRequests += newConnection
+      val newConnection = connectionRequests.getOrElseUpdate(connectionManagerId, new SendingConnection(inetSocketAddress, selector))
       newConnection   
     }
     val lookupKey = ConnectionManagerId.fromSocketAddress(connectionManagerId.toSocketAddress)
@@ -465,7 +465,7 @@ private[spark] object ConnectionManager {
           val bufferMessage = Message.createBufferMessage(buffer.duplicate)
           manager.sendMessageReliably(manager.id, bufferMessage)
         }).foreach(f => {
-          val g = Await.result(f, 1 second)
+          val g = Await.result(f, 10 second)
           if (!g.isDefined) println("Failed")
         })
       val finishTime = System.currentTimeMillis
@@ -473,6 +473,7 @@ private[spark] object ConnectionManager {
       val mb = size * count / 1024.0 / 1024.0
       val ms = finishTime - startTime
       val tput = mb * 1000.0 / ms
+      println("Sent " + mb + " MB in " + ms + " ms (" + tput + " MB/s)")
       println("--------------------------")
       println()
     }
diff --git a/core/src/main/scala/spark/network/ConnectionManagerTest.scala b/core/src/main/scala/spark/network/ConnectionManagerTest.scala
index 47ceaf3c07..0e79c518e0 100644
--- a/core/src/main/scala/spark/network/ConnectionManagerTest.scala
+++ b/core/src/main/scala/spark/network/ConnectionManagerTest.scala
@@ -13,8 +13,8 @@ import akka.util.duration._
 
 private[spark] object ConnectionManagerTest extends Logging{
   def main(args: Array[String]) {
-    if (args.length < 2) {
-      println("Usage: ConnectionManagerTest <mesos cluster> <slaves file>")
+    if (args.length < 5) {
+      println("Usage: ConnectionManagerTest <mesos cluster> <slaves file> <num of tasks> <size of msg> <count>")
       System.exit(1)
     }
     
@@ -29,16 +29,16 @@ private[spark] object ConnectionManagerTest extends Logging{
 
     /*println("Slaves")*/
     /*slaves.foreach(println)*/
-   
-    val slaveConnManagerIds = sc.parallelize(0 until slaves.length, slaves.length).map(
+    val tasknum = args(2).toInt
+    val slaveConnManagerIds = sc.parallelize(0 until tasknum, tasknum).map(
         i => SparkEnv.get.connectionManager.id).collect()
     println("\nSlave ConnectionManagerIds")
     slaveConnManagerIds.foreach(println)
     println
 
-    val count = 10
+    val count = args(4).toInt
     (0 until count).foreach(i => {
-      val resultStrs = sc.parallelize(0 until slaves.length, slaves.length).map(i => {
+      val resultStrs = sc.parallelize(0 until tasknum, tasknum).map(i => {
         val connManager = SparkEnv.get.connectionManager
         val thisConnManagerId = connManager.id 
         connManager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => { 
@@ -46,7 +46,7 @@ private[spark] object ConnectionManagerTest extends Logging{
           None
         })
 
-        val size =  100 * 1024  * 1024 
+        val size = (args(3).toInt) * 1024  * 1024 
         val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
         buffer.flip
         
@@ -56,13 +56,13 @@ private[spark] object ConnectionManagerTest extends Logging{
           logInfo("Sending [" + bufferMessage + "] to [" + slaveConnManagerId + "]")
           connManager.sendMessageReliably(slaveConnManagerId, bufferMessage)
         })
-        val results = futures.map(f => Await.result(f, 1.second))
+        val results = futures.map(f => Await.result(f, 999.second))
         val finishTime = System.currentTimeMillis
         Thread.sleep(5000)
         
         val mb = size * results.size / 1024.0 / 1024.0
         val ms = finishTime - startTime
-        val resultStr = "Sent " + mb + " MB in " + ms + " ms at " + (mb / ms * 1000.0) + " MB/s"
+        val resultStr = thisConnManagerId + " Sent " + mb + " MB in " + ms + " ms at " + (mb / ms * 1000.0) + " MB/s"
         logInfo(resultStr)
         resultStr
       }).collect()

From 8ac0f35be42765fcd6f02dcf0f070f2ef2377a85 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 8 Jan 2013 09:57:45 -0600
Subject: [PATCH 066/291] Add JavaRDDLike.keyBy.

---
 core/src/main/scala/spark/api/java/JavaRDDLike.scala |  8 ++++++++
 core/src/test/scala/spark/JavaAPISuite.java          | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/core/src/main/scala/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
index 81d3a94466..d15f6dd02f 100644
--- a/core/src/main/scala/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
@@ -298,4 +298,12 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Save this RDD as a SequenceFile of serialized objects.
    */
   def saveAsObjectFile(path: String) = rdd.saveAsObjectFile(path)
+
+  /**
+   * Creates tuples of the elements in this RDD by applying `f`.
+   */
+  def keyBy[K](f: JFunction[T, K]): JavaPairRDD[K, T] = {
+    implicit val kcm: ClassManifest[K] = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
+    JavaPairRDD.fromRDD(rdd.keyBy(f))
+  }
 }
diff --git a/core/src/test/scala/spark/JavaAPISuite.java b/core/src/test/scala/spark/JavaAPISuite.java
index 0817d1146c..c61913fc82 100644
--- a/core/src/test/scala/spark/JavaAPISuite.java
+++ b/core/src/test/scala/spark/JavaAPISuite.java
@@ -629,4 +629,16 @@ public class JavaAPISuite implements Serializable {
     floatAccum.setValue(5.0f);
     Assert.assertEquals((Float) 5.0f, floatAccum.value());
   }
+
+  @Test
+  public void keyBy() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
+    List<Tuple2<String, Integer>> s = rdd.keyBy(new Function<Integer, String>() {
+      public String call(Integer t) throws Exception {
+        return t.toString();
+      }
+    }).collect();
+    Assert.assertEquals(new Tuple2<String, Integer>("1", 1), s.get(0));
+    Assert.assertEquals(new Tuple2<String, Integer>("2", 2), s.get(1));
+  }
 }

From c3f1675f9c4a1be9eebf9512795abc968ac29ba2 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 8 Jan 2013 14:44:33 -0600
Subject: [PATCH 067/291] Retrieve jars to a flat directory so * can be used
 for the classpath.

---
 project/SparkBuild.scala |  1 +
 run                      | 12 +++---------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 7c7c33131a..518c4130f0 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -38,6 +38,7 @@ object SparkBuild extends Build {
     scalacOptions := Seq(/*"-deprecation",*/ "-unchecked", "-optimize"), // -deprecation is too noisy due to usage of old Hadoop API, enable it once that's no longer an issue
     unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath },
     retrieveManaged := true,
+    retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
     transitiveClassifiers in Scope.GlobalScope := Seq("sources"),
     testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))),
 
diff --git a/run b/run
index 1528f83534..6cfe9631af 100755
--- a/run
+++ b/run
@@ -75,16 +75,10 @@ CLASSPATH+=":$CORE_DIR/src/main/resources"
 CLASSPATH+=":$REPL_DIR/target/scala-$SCALA_VERSION/classes"
 CLASSPATH+=":$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
 if [ -e "$FWDIR/lib_managed" ]; then
-  for jar in `find "$FWDIR/lib_managed/jars" -name '*jar'`; do
-    CLASSPATH+=":$jar"
-  done
-  for jar in `find "$FWDIR/lib_managed/bundles" -name '*jar'`; do
-    CLASSPATH+=":$jar"
-  done
+  CLASSPATH+=":$FWDIR/lib_managed/jars/*"
+  CLASSPATH+=":$FWDIR/lib_managed/bundles/*"
 fi
-for jar in `find "$REPL_DIR/lib" -name '*jar'`; do
-  CLASSPATH+=":$jar"
-done
+CLASSPATH+=":$REPL_DIR/lib/*"
 for jar in `find "$REPL_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
   CLASSPATH+=":$jar"
 done

From b57dd0f16024a82dfc223e69528b9908b931f068 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Tue, 8 Jan 2013 16:04:41 -0800
Subject: [PATCH 068/291] Add mapPartitionsWithSplit() to PySpark.

---
 .../scala/spark/api/python/PythonRDD.scala    |  5 +++
 docs/python-programming-guide.md              |  1 -
 python/pyspark/rdd.py                         | 33 ++++++++++++-------
 python/pyspark/worker.py                      |  4 ++-
 4 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 79d824d494..f431ef28d3 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -65,6 +65,9 @@ private[spark] class PythonRDD[T: ClassManifest](
         SparkEnv.set(env)
         val out = new PrintWriter(proc.getOutputStream)
         val dOut = new DataOutputStream(proc.getOutputStream)
+        // Split index
+        dOut.writeInt(split.index)
+        // Broadcast variables
         dOut.writeInt(broadcastVars.length)
         for (broadcast <- broadcastVars) {
           dOut.writeLong(broadcast.id)
@@ -72,10 +75,12 @@ private[spark] class PythonRDD[T: ClassManifest](
           dOut.write(broadcast.value)
           dOut.flush()
         }
+        // Serialized user code
         for (elem <- command) {
           out.println(elem)
         }
         out.flush()
+        // Data values
         for (elem <- parent.iterator(split, context)) {
           PythonRDD.writeAsPickle(elem, dOut)
         }
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index d963551296..78ef310a00 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -19,7 +19,6 @@ There are a few key differences between the Python and Scala APIs:
     - Accumulators
     - Special functions on RDDs of doubles, such as `mean` and `stdev`
     - `lookup`
-    - `mapPartitionsWithSplit`
     - `persist` at storage levels other than `MEMORY_ONLY`
     - `sample`
     - `sort`
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 4ba417b2a2..1d36da42b0 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -55,7 +55,7 @@ class RDD(object):
         """
         Return a new RDD containing the distinct elements in this RDD.
         """
-        def func(iterator): return imap(f, iterator)
+        def func(split, iterator): return imap(f, iterator)
         return PipelinedRDD(self, func, preservesPartitioning)
 
     def flatMap(self, f, preservesPartitioning=False):
@@ -69,8 +69,8 @@ class RDD(object):
         >>> sorted(rdd.flatMap(lambda x: [(x, x), (x, x)]).collect())
         [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
         """
-        def func(iterator): return chain.from_iterable(imap(f, iterator))
-        return self.mapPartitions(func, preservesPartitioning)
+        def func(s, iterator): return chain.from_iterable(imap(f, iterator))
+        return self.mapPartitionsWithSplit(func, preservesPartitioning)
 
     def mapPartitions(self, f, preservesPartitioning=False):
         """
@@ -81,9 +81,20 @@ class RDD(object):
         >>> rdd.mapPartitions(f).collect()
         [3, 7]
         """
-        return PipelinedRDD(self, f, preservesPartitioning)
+        def func(s, iterator): return f(iterator)
+        return self.mapPartitionsWithSplit(func)
 
-    # TODO: mapPartitionsWithSplit
+    def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD by applying a function to each partition of this RDD,
+        while tracking the index of the original partition.
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
+        >>> def f(splitIndex, iterator): yield splitIndex
+        >>> rdd.mapPartitionsWithSplit(f).sum()
+        6
+        """
+        return PipelinedRDD(self, f, preservesPartitioning)
 
     def filter(self, f):
         """
@@ -362,7 +373,7 @@ class RDD(object):
         >>> ''.join(input(glob(tempFile.name + "/part-0000*")))
         '0\\n1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n'
         """
-        def func(iterator):
+        def func(split, iterator):
             return (str(x).encode("utf-8") for x in iterator)
         keyed = PipelinedRDD(self, func)
         keyed._bypass_serializer = True
@@ -500,7 +511,7 @@ class RDD(object):
         # Transferring O(n) objects to Java is too expensive.  Instead, we'll
         # form the hash buckets in Python, transferring O(numSplits) objects
         # to Java.  Each object is a (splitNumber, [objects]) pair.
-        def add_shuffle_key(iterator):
+        def add_shuffle_key(split, iterator):
             buckets = defaultdict(list)
             for (k, v) in iterator:
                 buckets[hashFunc(k) % numSplits].append((k, v))
@@ -653,8 +664,8 @@ class PipelinedRDD(RDD):
     def __init__(self, prev, func, preservesPartitioning=False):
         if isinstance(prev, PipelinedRDD) and not prev.is_cached:
             prev_func = prev.func
-            def pipeline_func(iterator):
-                return func(prev_func(iterator))
+            def pipeline_func(split, iterator):
+                return func(split, prev_func(split, iterator))
             self.func = pipeline_func
             self.preservesPartitioning = \
                 prev.preservesPartitioning and preservesPartitioning
@@ -677,8 +688,8 @@ class PipelinedRDD(RDD):
         if not self._bypass_serializer and self.ctx.batchSize != 1:
             oldfunc = self.func
             batchSize = self.ctx.batchSize
-            def batched_func(iterator):
-                return batched(oldfunc(iterator), batchSize)
+            def batched_func(split, iterator):
+                return batched(oldfunc(split, iterator), batchSize)
             func = batched_func
         cmds = [func, self._bypass_serializer]
         pipe_command = ' '.join(b64enc(cloudpickle.dumps(f)) for f in cmds)
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 9f6b507dbd..3d792bbaa2 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -21,6 +21,7 @@ def load_obj():
 
 
 def main():
+    split_index = read_int(sys.stdin)
     num_broadcast_variables = read_int(sys.stdin)
     for _ in range(num_broadcast_variables):
         bid = read_long(sys.stdin)
@@ -32,7 +33,8 @@ def main():
         dumps = lambda x: x
     else:
         dumps = dump_pickle
-    for obj in func(read_from_pickle_file(sys.stdin)):
+    iterator = read_from_pickle_file(sys.stdin)
+    for obj in func(split_index, iterator):
         write_with_length(dumps(obj), old_stdout)
 
 

From 9cc764f52323baa3a218ce9e301d3cc98f1e8b20 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 8 Jan 2013 22:29:57 -0800
Subject: [PATCH 069/291] Code style

---
 .../scala/spark/scheduler/cluster/TaskSetManager.scala     | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
index a842afcdeb..a089b71644 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
@@ -201,8 +201,11 @@ private[spark] class TaskSetManager(
           val taskId = sched.newTaskId()
           // Figure out whether this should count as a preferred launch
           val preferred = isPreferredLocation(task, host)
-          val prefStr = if (preferred) "preferred"
-                        else "non-preferred, not one of " + task.preferredLocations.mkString(", ")
+          val prefStr = if (preferred) {
+            "preferred"
+          } else {
+            "non-preferred, not one of " + task.preferredLocations.mkString(", ")
+          }
           logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format(
             taskSet.id, index, taskId, slaveId, host, prefStr))
           // Do various bookkeeping

From 6e8c8f61c478ec5829677a38a624f17ac9609f74 Mon Sep 17 00:00:00 2001
From: Tyson <thamilton@verticalscope.com>
Date: Wed, 9 Jan 2013 10:35:23 -0500
Subject: [PATCH 070/291] Added the spray implicit marshaller library Added the
 io.spray JSON library

---
 project/SparkBuild.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 2f67bb9921..f2b79d9ed8 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -133,6 +133,8 @@ object SparkBuild extends Build {
       "colt" % "colt" % "1.2.0",
       "cc.spray" % "spray-can" % "1.0-M2.1",
       "cc.spray" % "spray-server" % "1.0-M2.1",
+      "cc.spray" %%  "spray-json" % "1.1.1",
+      "io.spray" %%  "spray-json" % "1.2.3",
       "org.apache.mesos" % "mesos" % "0.9.0-incubating"
     ) ++ (if (HADOOP_MAJOR_VERSION == "2") Some("org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION) else None).toSeq,
     unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / ("src/hadoop" + HADOOP_MAJOR_VERSION + "/scala") }

From 269fe018c73a0d4e12a3c881dbd3bd807e504891 Mon Sep 17 00:00:00 2001
From: Tyson <thamilton@verticalscope.com>
Date: Wed, 9 Jan 2013 10:35:59 -0500
Subject: [PATCH 071/291] JSON object definitions

---
 .../scala/spark/deploy/JsonProtocol.scala     | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 core/src/main/scala/spark/deploy/JsonProtocol.scala

diff --git a/core/src/main/scala/spark/deploy/JsonProtocol.scala b/core/src/main/scala/spark/deploy/JsonProtocol.scala
new file mode 100644
index 0000000000..dc7da85f9c
--- /dev/null
+++ b/core/src/main/scala/spark/deploy/JsonProtocol.scala
@@ -0,0 +1,59 @@
+package spark.deploy
+
+import master.{JobInfo, WorkerInfo}
+import spray.json._
+
+/**
+ * spray-json helper class containing implicit conversion to json for marshalling responses
+ */
+private[spark] object JsonProtocol extends DefaultJsonProtocol {
+  import cc.spray.json._
+
+  implicit object WorkerInfoJsonFormat extends RootJsonWriter[WorkerInfo] {
+    def write(obj: WorkerInfo) = JsObject(
+      "id" -> JsString(obj.id),
+      "host" -> JsString(obj.host),
+      "webuiaddress" -> JsString(obj.webUiAddress),
+      "cores" -> JsNumber(obj.cores),
+      "coresused" -> JsNumber(obj.coresUsed),
+      "memory" -> JsNumber(obj.memory),
+      "memoryused" -> JsNumber(obj.memoryUsed)
+    )
+  }
+
+  implicit object JobInfoJsonFormat extends RootJsonWriter[JobInfo] {
+    def write(obj: JobInfo) = JsObject(
+      "starttime" -> JsNumber(obj.startTime),
+      "id" -> JsString(obj.id),
+      "name" -> JsString(obj.desc.name),
+      "cores" -> JsNumber(obj.desc.cores),
+      "user" -> JsString(obj.desc.user),
+      "memoryperslave" -> JsNumber(obj.desc.memoryPerSlave),
+      "submitdate" -> JsString(obj.submitDate.toString))
+  }
+
+  implicit object MasterStateJsonFormat extends RootJsonWriter[MasterState] {
+    def write(obj: MasterState) = JsObject(
+      "url" -> JsString("spark://" + obj.uri),
+      "workers" -> JsArray(obj.workers.toList.map(_.toJson)),
+      "cores" -> JsNumber(obj.workers.map(_.cores).sum),
+      "coresused" -> JsNumber(obj.workers.map(_.coresUsed).sum),
+      "memory" -> JsNumber(obj.workers.map(_.memory).sum),
+      "memoryused" -> JsNumber(obj.workers.map(_.memoryUsed).sum),
+      "activejobs" -> JsArray(obj.activeJobs.toList.map(_.toJson)),
+      "completedjobs" -> JsArray(obj.completedJobs.toList.map(_.toJson))
+    )
+  }
+
+  implicit object WorkerStateJsonFormat extends RootJsonWriter[WorkerState] {
+    def write(obj: WorkerState) = JsObject(
+      "id" -> JsString(obj.workerId),
+      "masterurl" -> JsString(obj.masterUrl),
+      "masterwebuiurl" -> JsString(obj.masterWebUiUrl),
+      "cores" -> JsNumber(obj.cores),
+      "coresused" -> JsNumber(obj.coresUsed),
+      "memory" -> JsNumber(obj.memory),
+      "memoryused" -> JsNumber(obj.memoryUsed)
+    )
+  }
+}

From 0da2ff102e1e8ac50059252a153a1b9b3e74b6b8 Mon Sep 17 00:00:00 2001
From: Tyson <thamilton@verticalscope.com>
Date: Wed, 9 Jan 2013 10:36:56 -0500
Subject: [PATCH 072/291] Added url query parameter json and handler

---
 .../spark/deploy/master/MasterWebUI.scala     | 19 +++++++++++++-----
 .../spark/deploy/worker/WorkerWebUI.scala     | 20 ++++++++++++++-----
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
index 3cdd3721f5..dfec1d1dc5 100644
--- a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
+++ b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
@@ -9,6 +9,9 @@ import cc.spray.Directives
 import cc.spray.directives._
 import cc.spray.typeconversion.TwirlSupport._
 import spark.deploy._
+import cc.spray.http.MediaTypes
+import JsonProtocol._
+import cc.spray.typeconversion.SprayJsonSupport._
 
 private[spark]
 class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Directives {
@@ -19,13 +22,19 @@ class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Direct
   
   val handler = {
     get {
-      path("") {
-        completeWith {
+      (path("") & parameters('json ?)) {
+        case Some(js) =>
           val future = master ? RequestMasterState
-          future.map { 
-            masterState => spark.deploy.master.html.index.render(masterState.asInstanceOf[MasterState])
+          respondWithMediaType(MediaTypes.`application/json`) { ctx =>
+            ctx.complete(future.mapTo[MasterState])
+          }
+        case None =>
+          completeWith {
+            val future = master ? RequestMasterState
+            future.map {
+              masterState => spark.deploy.master.html.index.render(masterState.asInstanceOf[MasterState])
+            }
           }
-        }
       } ~
       path("job") {
         parameter("jobId") { jobId =>
diff --git a/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala b/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
index d06f4884ee..a168f54ca0 100644
--- a/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
+++ b/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
@@ -7,7 +7,10 @@ import akka.util.Timeout
 import akka.util.duration._
 import cc.spray.Directives
 import cc.spray.typeconversion.TwirlSupport._
-import spark.deploy.{WorkerState, RequestWorkerState}
+import spark.deploy.{JsonProtocol, WorkerState, RequestWorkerState}
+import cc.spray.http.MediaTypes
+import JsonProtocol._
+import cc.spray.typeconversion.SprayJsonSupport._
 
 private[spark]
 class WorkerWebUI(val actorSystem: ActorSystem, worker: ActorRef) extends Directives {
@@ -18,13 +21,20 @@ class WorkerWebUI(val actorSystem: ActorSystem, worker: ActorRef) extends Direct
   
   val handler = {
     get {
-      path("") {
-        completeWith{
+      (path("") & parameters('json ?)) {
+        case Some(js) => {
           val future = worker ? RequestWorkerState
-          future.map { workerState =>
-            spark.deploy.worker.html.index(workerState.asInstanceOf[WorkerState])
+          respondWithMediaType(MediaTypes.`application/json`) { ctx =>
+            ctx.complete(future.mapTo[WorkerState])
           }
         }
+        case None =>
+          completeWith{
+            val future = worker ? RequestWorkerState
+            future.map { workerState =>
+              spark.deploy.worker.html.index(workerState.asInstanceOf[WorkerState])
+            }
+          }
       } ~
       path("log") {
         parameters("jobId", "executorId", "logType") { (jobId, executorId, logType) =>

From bf9d9946f97782c9212420123b4a042918d7df5e Mon Sep 17 00:00:00 2001
From: Tyson <thamilton@verticalscope.com>
Date: Wed, 9 Jan 2013 11:29:22 -0500
Subject: [PATCH 073/291] Query parameter reformatted to be more extensible and
 routing more robust

---
 core/src/main/scala/spark/deploy/master/MasterWebUI.scala | 6 +++---
 core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
index dfec1d1dc5..a96b55d6f3 100644
--- a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
+++ b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
@@ -22,13 +22,13 @@ class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Direct
   
   val handler = {
     get {
-      (path("") & parameters('json ?)) {
-        case Some(js) =>
+      (path("") & parameters('format ?)) {
+        case Some(js) if js.equalsIgnoreCase("json") =>
           val future = master ? RequestMasterState
           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
             ctx.complete(future.mapTo[MasterState])
           }
-        case None =>
+        case _ =>
           completeWith {
             val future = master ? RequestMasterState
             future.map {
diff --git a/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala b/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
index a168f54ca0..84b6c16bd6 100644
--- a/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
+++ b/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
@@ -21,14 +21,14 @@ class WorkerWebUI(val actorSystem: ActorSystem, worker: ActorRef) extends Direct
   
   val handler = {
     get {
-      (path("") & parameters('json ?)) {
-        case Some(js) => {
+      (path("") & parameters('format ?)) {
+        case Some(js) if js.equalsIgnoreCase("json") => {
           val future = worker ? RequestWorkerState
           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
             ctx.complete(future.mapTo[WorkerState])
           }
         }
-        case None =>
+        case _ =>
           completeWith{
             val future = worker ? RequestWorkerState
             future.map { workerState =>

From 549ee388a125ac7014ae3dadfb16c582e250c654 Mon Sep 17 00:00:00 2001
From: Tyson <thamilton@verticalscope.com>
Date: Wed, 9 Jan 2013 15:12:23 -0500
Subject: [PATCH 074/291] Removed io.spray spray-json dependency as it is not
 needed.

---
 core/src/main/scala/spark/deploy/JsonProtocol.scala | 4 +---
 project/SparkBuild.scala                            | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/JsonProtocol.scala b/core/src/main/scala/spark/deploy/JsonProtocol.scala
index dc7da85f9c..f14f804b3a 100644
--- a/core/src/main/scala/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/spark/deploy/JsonProtocol.scala
@@ -1,14 +1,12 @@
 package spark.deploy
 
 import master.{JobInfo, WorkerInfo}
-import spray.json._
+import cc.spray.json._
 
 /**
  * spray-json helper class containing implicit conversion to json for marshalling responses
  */
 private[spark] object JsonProtocol extends DefaultJsonProtocol {
-  import cc.spray.json._
-
   implicit object WorkerInfoJsonFormat extends RootJsonWriter[WorkerInfo] {
     def write(obj: WorkerInfo) = JsObject(
       "id" -> JsString(obj.id),
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index f2b79d9ed8..c63efbdd2a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -134,7 +134,6 @@ object SparkBuild extends Build {
       "cc.spray" % "spray-can" % "1.0-M2.1",
       "cc.spray" % "spray-server" % "1.0-M2.1",
       "cc.spray" %%  "spray-json" % "1.1.1",
-      "io.spray" %%  "spray-json" % "1.2.3",
       "org.apache.mesos" % "mesos" % "0.9.0-incubating"
     ) ++ (if (HADOOP_MAJOR_VERSION == "2") Some("org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION) else None).toSeq,
     unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / ("src/hadoop" + HADOOP_MAJOR_VERSION + "/scala") }

From e3861ae3953d7cab66160833688c8baf84e835ad Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Wed, 9 Jan 2013 17:03:25 -0600
Subject: [PATCH 075/291] Provide and expose a default Hadoop Configuration.

Any "hadoop.*" system properties will be passed along into configuration.
---
 core/src/main/scala/spark/SparkContext.scala   | 18 ++++++++++++++----
 .../spark/api/java/JavaSparkContext.scala      |  7 +++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index bbf8272eb3..36e0938854 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -187,6 +187,18 @@ class SparkContext(
 
   private var dagScheduler = new DAGScheduler(taskScheduler)
 
+  /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
+  val hadoopConfiguration = {
+    val conf = new Configuration()
+    // Copy any "hadoop.foo=bar" system properties into conf as "foo=bar"
+    for (key <- System.getProperties.keys.asInstanceOf[Set[String]] if key.startsWith("hadoop.")) {
+      conf.set(key.substring("hadoop.".length), System.getProperty(key))
+    }
+    val bufferSize = System.getProperty("spark.buffer.size", "65536")
+    conf.set("io.file.buffer.size", bufferSize)
+    conf
+  }
+
   // Methods for creating RDDs
 
   /** Distribute a local Scala collection to form an RDD. */
@@ -231,10 +243,8 @@ class SparkContext(
       valueClass: Class[V],
       minSplits: Int = defaultMinSplits
       ) : RDD[(K, V)] = {
-    val conf = new JobConf()
+    val conf = new JobConf(hadoopConfiguration)
     FileInputFormat.setInputPaths(conf, path)
-    val bufferSize = System.getProperty("spark.buffer.size", "65536")
-    conf.set("io.file.buffer.size", bufferSize)
     new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, minSplits)
   }
 
@@ -276,7 +286,7 @@ class SparkContext(
         fm.erasure.asInstanceOf[Class[F]],
         km.erasure.asInstanceOf[Class[K]],
         vm.erasure.asInstanceOf[Class[V]],
-        new Configuration)
+        hadoopConfiguration)
   }
 
   /**
diff --git a/core/src/main/scala/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
index 88ab2846be..12e2a0bdac 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
@@ -355,6 +355,13 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def clearFiles() {
     sc.clearFiles()
   }
+
+  /**
+   * Returns the Hadoop configuration used for the Hadoop code (e.g. file systems) we reuse.
+   */
+  def hadoopConfiguration() {
+    sc.hadoopConfiguration
+  }
 }
 
 object JavaSparkContext {

From 1a64432ba50904c3933d8a9539a619fc94b3b30b Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Wed, 9 Jan 2013 20:30:36 -0800
Subject: [PATCH 076/291] Indicate success/failure in PySpark test script.

---
 python/run-tests | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/python/run-tests b/python/run-tests
index da9e24cb1f..fcdd1e27a7 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -3,7 +3,24 @@
 # Figure out where the Scala framework is installed
 FWDIR="$(cd `dirname $0`; cd ../; pwd)"
 
+FAILED=0
+
 $FWDIR/pyspark pyspark/rdd.py
+FAILED=$(($?||$FAILED))
+
 $FWDIR/pyspark -m doctest pyspark/broadcast.py
+FAILED=$(($?||$FAILED))
+
+if [[ $FAILED != 0 ]]; then
+    echo -en "\033[31m"  # Red
+    echo "Had test failures; see logs."
+    echo -en "\033[0m"  # No color
+    exit -1
+else
+    echo -en "\033[32m"  # Green
+    echo "Tests passed."
+    echo -en "\033[0m"  # No color
+fi
 
 # TODO: in the long-run, it would be nice to use a test runner like `nose`.
+# The doctest fixtures are the current barrier to doing this.

From d55f2b98822faa7d71f5fce2bfa980f8265e0610 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Wed, 9 Jan 2013 21:21:23 -0800
Subject: [PATCH 077/291] Use take() instead of takeSample() in PySpark kmeans
 example.

This is a temporary change until we port takeSample().
---
 python/examples/kmeans.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/examples/kmeans.py b/python/examples/kmeans.py
index ad2be21178..72cf9f88c6 100644
--- a/python/examples/kmeans.py
+++ b/python/examples/kmeans.py
@@ -33,7 +33,9 @@ if __name__ == "__main__":
     K = int(sys.argv[3])
     convergeDist = float(sys.argv[4])
 
-    kPoints = data.takeSample(False, K, 34)
+    # TODO: change this after we port takeSample()
+    #kPoints = data.takeSample(False, K, 34)
+    kPoints = data.take(K)
     tempDist = 1.0
 
     while tempDist > convergeDist:

From 9930a95d217045c4c22c2575080a03e4b0fd2426 Mon Sep 17 00:00:00 2001
From: shane-huang <shengsheng.huang@intel.com>
Date: Thu, 10 Jan 2013 20:09:34 +0800
Subject: [PATCH 078/291] Modified Patch according to comments

---
 .../main/scala/spark/network/Connection.scala |  8 ++++----
 .../spark/network/ConnectionManager.scala     |  9 ++++-----
 .../spark/network/ConnectionManagerTest.scala | 20 +++++++++++++------
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/spark/network/Connection.scala b/core/src/main/scala/spark/network/Connection.scala
index 95096fd0ba..c193bf7c8d 100644
--- a/core/src/main/scala/spark/network/Connection.scala
+++ b/core/src/main/scala/spark/network/Connection.scala
@@ -136,10 +136,10 @@ extends Connection(SocketChannel.open, selector_) {
           if (chunk.isDefined) {
             messages += message  // this is probably incorrect, it wont work as fifo
             if (!message.started) {
-		logDebug("Starting to send [" + message + "]")
- 		message.started = true
-	 	message.startTime = System.currentTimeMillis
-	    }
+              logDebug("Starting to send [" + message + "]")
+              message.started = true
+              message.startTime = System.currentTimeMillis
+            }
             return chunk 
           } else {
             /*logInfo("Finished sending [" + message + "] to [" + remoteConnectionManagerId + "]")*/
diff --git a/core/src/main/scala/spark/network/ConnectionManager.scala b/core/src/main/scala/spark/network/ConnectionManager.scala
index e7bd2d3bbd..36c01ad629 100644
--- a/core/src/main/scala/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/spark/network/ConnectionManager.scala
@@ -43,12 +43,12 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
   }
   
   val selector = SelectorProvider.provider.openSelector()
-  val handleMessageExecutor = Executors.newFixedThreadPool(20) 
+  val handleMessageExecutor = Executors.newFixedThreadPool(System.getProperty("spark.core.connection.handler.threads","20").toInt)
   val serverChannel = ServerSocketChannel.open()
   val connectionsByKey = new HashMap[SelectionKey, Connection] with SynchronizedMap[SelectionKey, Connection] 
   val connectionsById = new HashMap[ConnectionManagerId, SendingConnection] with SynchronizedMap[ConnectionManagerId, SendingConnection]
   val messageStatuses = new HashMap[Int, MessageStatus] 
-  val connectionRequests = new HashMap[ConnectionManagerId, SendingConnection] with SynchronizedMap[ConnectionManagerId, SendingConnection] 
+  val connectionRequests = new HashMap[ConnectionManagerId, SendingConnection] with SynchronizedMap[ConnectionManagerId, SendingConnection]
   val keyInterestChangeRequests = new SynchronizedQueue[(SelectionKey, Int)]
   val sendMessageRequests = new Queue[(Message, SendingConnection)]
 
@@ -78,9 +78,8 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
 
   def run() {
     try {
-      while(!selectorThread.isInterrupted) {   
+      while(!selectorThread.isInterrupted) {
         for( (connectionManagerId, sendingConnection) <- connectionRequests) {
-          //val sendingConnection = connectionRequests.dequeue
           sendingConnection.connect() 
           addConnection(sendingConnection)
           connectionRequests -= connectionManagerId
@@ -465,7 +464,7 @@ private[spark] object ConnectionManager {
           val bufferMessage = Message.createBufferMessage(buffer.duplicate)
           manager.sendMessageReliably(manager.id, bufferMessage)
         }).foreach(f => {
-          val g = Await.result(f, 10 second)
+          val g = Await.result(f, 1 second)
           if (!g.isDefined) println("Failed")
         })
       val finishTime = System.currentTimeMillis
diff --git a/core/src/main/scala/spark/network/ConnectionManagerTest.scala b/core/src/main/scala/spark/network/ConnectionManagerTest.scala
index 0e79c518e0..533e4610f3 100644
--- a/core/src/main/scala/spark/network/ConnectionManagerTest.scala
+++ b/core/src/main/scala/spark/network/ConnectionManagerTest.scala
@@ -13,8 +13,14 @@ import akka.util.duration._
 
 private[spark] object ConnectionManagerTest extends Logging{
   def main(args: Array[String]) {
-    if (args.length < 5) {
-      println("Usage: ConnectionManagerTest <mesos cluster> <slaves file> <num of tasks> <size of msg> <count>")
+    //<mesos cluster> - the master URL
+    //<slaves file> - a list slaves to run connectionTest on
+    //[num of tasks] - the number of parallel tasks to be initiated default is number of slave hosts
+    //[size of msg in MB (integer)] - the size of messages to be sent in each task, default is 10
+    //[count] - how many times to run, default is 3
+    //[await time in seconds] : await time (in seconds), default is 600
+    if (args.length < 2) {
+      println("Usage: ConnectionManagerTest <mesos cluster> <slaves file> [num of tasks] [size of msg in MB (integer)] [count] [await time in seconds)] ")
       System.exit(1)
     }
     
@@ -29,14 +35,17 @@ private[spark] object ConnectionManagerTest extends Logging{
 
     /*println("Slaves")*/
     /*slaves.foreach(println)*/
-    val tasknum = args(2).toInt
+    val tasknum = if (args.length > 2) args(2).toInt else slaves.length
+    val size = ( if (args.length > 3) (args(3).toInt) else 10 ) * 1024 * 1024 
+    val count = if (args.length > 4) args(4).toInt else 3
+    val awaitTime = (if (args.length > 5) args(5).toInt else 600 ).second
+    println("Running "+count+" rounds of test: " + "parallel tasks = " + tasknum + ", msg size = " + size/1024/1024 + " MB, awaitTime = " + awaitTime)
     val slaveConnManagerIds = sc.parallelize(0 until tasknum, tasknum).map(
         i => SparkEnv.get.connectionManager.id).collect()
     println("\nSlave ConnectionManagerIds")
     slaveConnManagerIds.foreach(println)
     println
 
-    val count = args(4).toInt
     (0 until count).foreach(i => {
       val resultStrs = sc.parallelize(0 until tasknum, tasknum).map(i => {
         val connManager = SparkEnv.get.connectionManager
@@ -46,7 +55,6 @@ private[spark] object ConnectionManagerTest extends Logging{
           None
         })
 
-        val size = (args(3).toInt) * 1024  * 1024 
         val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
         buffer.flip
         
@@ -56,7 +64,7 @@ private[spark] object ConnectionManagerTest extends Logging{
           logInfo("Sending [" + bufferMessage + "] to [" + slaveConnManagerId + "]")
           connManager.sendMessageReliably(slaveConnManagerId, bufferMessage)
         })
-        val results = futures.map(f => Await.result(f, 999.second))
+        val results = futures.map(f => Await.result(f, awaitTime))
         val finishTime = System.currentTimeMillis
         Thread.sleep(5000)
         

From 49c74ba2af2ab6fe5eda16dbcd35b30b46072a3a Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Thu, 10 Jan 2013 07:45:12 -0800
Subject: [PATCH 079/291] Change PYSPARK_PYTHON_EXEC to PYSPARK_PYTHON.

---
 python/pyspark/context.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 4439356c1f..e486f206b0 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -52,7 +52,7 @@ class SparkContext(object):
         self._jsc = self.jvm.JavaSparkContext(master, jobName, sparkHome,
                                               empty_string_array)
 
-        self.pythonExec = os.environ.get("PYSPARK_PYTHON_EXEC", 'python')
+        self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
         # been pickled, so it can determine which Java broadcast objects to

From b15e8512793475eaeda7225a259db8aacd600741 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Thu, 10 Jan 2013 10:55:41 -0600
Subject: [PATCH 080/291] Check for AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY
 environment variables.

For custom properties, use "spark.hadoop.*" as a prefix instead of just "hadoop.*".
---
 core/src/main/scala/spark/SparkContext.scala | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 36e0938854..7b11955f1e 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -190,9 +190,16 @@ class SparkContext(
   /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
   val hadoopConfiguration = {
     val conf = new Configuration()
-    // Copy any "hadoop.foo=bar" system properties into conf as "foo=bar"
-    for (key <- System.getProperties.keys.asInstanceOf[Set[String]] if key.startsWith("hadoop.")) {
-      conf.set(key.substring("hadoop.".length), System.getProperty(key))
+    // Explicitly check for S3 environment variables
+    if (System.getenv("AWS_ACCESS_KEY_ID") != null && System.getenv("AWS_SECRET_ACCESS_KEY") != null) {
+      conf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
+      conf.set("fs.s3n.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
+      conf.set("fs.s3.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
+      conf.set("fs.s3n.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
+    }
+    // Copy any "spark.hadoop.foo=bar" system properties into conf as "foo=bar"
+    for (key <- System.getProperties.keys.asInstanceOf[Set[String]] if key.startsWith("spark.hadoop.")) {
+      conf.set(key.substring("spark.hadoop.".length), System.getProperty(key))
     }
     val bufferSize = System.getProperty("spark.buffer.size", "65536")
     conf.set("io.file.buffer.size", bufferSize)

From d1864052c58ff1e58980729f7ccf00e630f815b9 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Thu, 10 Jan 2013 12:16:26 -0600
Subject: [PATCH 081/291] Fix invalid asInstanceOf cast.

---
 core/src/main/scala/spark/SparkContext.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 7b11955f1e..d2a5b4757a 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -7,6 +7,7 @@ import java.net.{URI, URLClassLoader}
 import scala.collection.Map
 import scala.collection.generic.Growable
 import scala.collection.mutable.{ArrayBuffer, HashMap}
+import scala.collection.JavaConversions._
 
 import akka.actor.Actor
 import akka.actor.Actor._
@@ -198,7 +199,7 @@ class SparkContext(
       conf.set("fs.s3n.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
     }
     // Copy any "spark.hadoop.foo=bar" system properties into conf as "foo=bar"
-    for (key <- System.getProperties.keys.asInstanceOf[Set[String]] if key.startsWith("spark.hadoop.")) {
+    for (key <- System.getProperties.toMap[String, String].keys if key.startsWith("spark.hadoop.")) {
       conf.set(key.substring("spark.hadoop.".length), System.getProperty(key))
     }
     val bufferSize = System.getProperty("spark.buffer.size", "65536")

From bd336f5f406386c929f2d1f9aecd7d5190a1a087 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Thu, 10 Jan 2013 17:13:04 -0800
Subject: [PATCH 082/291] Changed CoGroupRDD's hash map from Scala to Java.

---
 core/src/main/scala/spark/rdd/CoGroupedRDD.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
index de0d9fad88..2e051c81c8 100644
--- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
@@ -1,7 +1,8 @@
 package spark.rdd
 
+import java.util.{HashMap => JHashMap}
 import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
+import scala.collection.JavaConversions._
 
 import spark.{Aggregator, Logging, Partitioner, RDD, SparkEnv, Split, TaskContext}
 import spark.{Dependency, OneToOneDependency, ShuffleDependency}
@@ -71,7 +72,7 @@ class CoGroupedRDD[K](@transient rdds: Seq[RDD[(_, _)]], part: Partitioner)
   override def compute(s: Split, context: TaskContext): Iterator[(K, Seq[Seq[_]])] = {
     val split = s.asInstanceOf[CoGroupSplit]
     val numRdds = split.deps.size
-    val map = new HashMap[K, Seq[ArrayBuffer[Any]]]
+    val map = new JHashMap[K, Seq[ArrayBuffer[Any]]]
     def getSeq(k: K): Seq[ArrayBuffer[Any]] = {
       map.getOrElseUpdate(k, Array.fill(numRdds)(new ArrayBuffer[Any]))
     }

From 2e914d99835487e867cac6add8be1dbd80dc693f Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 10 Jan 2013 19:13:08 -0800
Subject: [PATCH 083/291] Formatting

---
 core/src/main/scala/spark/deploy/master/MasterWebUI.scala | 5 +++--
 core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
index a96b55d6f3..580014ef3f 100644
--- a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
+++ b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
@@ -8,11 +8,12 @@ import akka.util.duration._
 import cc.spray.Directives
 import cc.spray.directives._
 import cc.spray.typeconversion.TwirlSupport._
-import spark.deploy._
 import cc.spray.http.MediaTypes
-import JsonProtocol._
 import cc.spray.typeconversion.SprayJsonSupport._
 
+import spark.deploy._
+import spark.deploy.JsonProtocol._
+
 private[spark]
 class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Directives {
   val RESOURCE_DIR = "spark/deploy/master/webui"
diff --git a/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala b/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
index 84b6c16bd6..f9489d99fc 100644
--- a/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
+++ b/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
@@ -7,11 +7,12 @@ import akka.util.Timeout
 import akka.util.duration._
 import cc.spray.Directives
 import cc.spray.typeconversion.TwirlSupport._
-import spark.deploy.{JsonProtocol, WorkerState, RequestWorkerState}
 import cc.spray.http.MediaTypes
-import JsonProtocol._
 import cc.spray.typeconversion.SprayJsonSupport._
 
+import spark.deploy.{WorkerState, RequestWorkerState}
+import spark.deploy.JsonProtocol._
+
 private[spark]
 class WorkerWebUI(val actorSystem: ActorSystem, worker: ActorRef) extends Directives {
   val RESOURCE_DIR = "spark/deploy/worker/webui"

From 92625223066a5c28553d7710c6b14af56f64b560 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Thu, 10 Jan 2013 22:07:34 -0800
Subject: [PATCH 084/291] Activate hadoop2 profile in pom.xml with -Dhadoop=2

---
 bagel/pom.xml    | 6 ++++++
 core/pom.xml     | 6 ++++++
 examples/pom.xml | 6 ++++++
 pom.xml          | 6 ++++++
 repl-bin/pom.xml | 6 ++++++
 repl/pom.xml     | 6 ++++++
 6 files changed, 36 insertions(+)

diff --git a/bagel/pom.xml b/bagel/pom.xml
index 85b2077026..c3461fb889 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -77,6 +77,12 @@
     </profile>
     <profile>
       <id>hadoop2</id>
+      <activation>
+        <property>
+          <name>hadoop</name>
+          <value>2</value>
+        </property>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
diff --git a/core/pom.xml b/core/pom.xml
index 005d8fe498..c8ff625774 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -216,6 +216,12 @@
     </profile>
     <profile>
       <id>hadoop2</id>
+      <activation>
+        <property>
+          <name>hadoop</name>
+          <value>2</value>
+        </property>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
diff --git a/examples/pom.xml b/examples/pom.xml
index 3f738a3f8c..d0b1e97747 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -77,6 +77,12 @@
     </profile>
     <profile>
       <id>hadoop2</id>
+      <activation>
+        <property>
+          <name>hadoop</name>
+          <value>2</value>
+        </property>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
diff --git a/pom.xml b/pom.xml
index ea5b9c9d05..ae87813d4e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -502,6 +502,12 @@
 
     <profile>
       <id>hadoop2</id>
+      <activation>
+        <property>
+          <name>hadoop</name>
+          <value>2</value>
+        </property>
+      </activation>
       <properties>
         <hadoop.major.version>2</hadoop.major.version>
       </properties>
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index fecb01f3cd..54ae20659e 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -115,6 +115,12 @@
     </profile>
     <profile>
       <id>hadoop2</id>
+      <activation>
+        <property>
+          <name>hadoop</name>
+          <value>2</value>
+        </property>
+      </activation>
       <properties>
         <classifier>hadoop2</classifier>
       </properties>
diff --git a/repl/pom.xml b/repl/pom.xml
index 04b2c35beb..3e979b93a6 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -121,6 +121,12 @@
     </profile>
     <profile>
       <id>hadoop2</id>
+      <activation>
+        <property>
+          <name>hadoop</name>
+          <value>2</value>
+        </property>
+      </activation>
       <properties>
         <classifier>hadoop2</classifier>
       </properties>

From 3e6519a36e354f3623c5b968efe5217c7fcb242f Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Fri, 11 Jan 2013 11:24:20 -0600
Subject: [PATCH 085/291] Use hadoopConfiguration for default JobConf in
 PairRDDFunctions.

---
 core/src/main/scala/spark/PairRDDFunctions.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala
index ce48cea903..51c15837c4 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/spark/PairRDDFunctions.scala
@@ -557,7 +557,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[_ <: OutputFormat[_, _]],
-      conf: JobConf = new JobConf) {
+      conf: JobConf = new JobConf(self.context.hadoopConfiguration)) {
     conf.setOutputKeyClass(keyClass)
     conf.setOutputValueClass(valueClass)
     // conf.setOutputFormat(outputFormatClass) // Doesn't work in Scala 2.9 due to what may be a generics bug

From 5c7a1272198c88a90a843bbda0c1424f92b7c12e Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Fri, 11 Jan 2013 11:25:11 -0600
Subject: [PATCH 086/291] Pass a new Configuration that wraps the default
 hadoopConfiguration.

---
 core/src/main/scala/spark/SparkContext.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index d2a5b4757a..f6b98c41bc 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -294,7 +294,7 @@ class SparkContext(
         fm.erasure.asInstanceOf[Class[F]],
         km.erasure.asInstanceOf[Class[K]],
         vm.erasure.asInstanceOf[Class[V]],
-        hadoopConfiguration)
+        new Configuration(hadoopConfiguration))
   }
 
   /**

From 480c4139bbd2711e99f3a819c9ef164d8b3dcac0 Mon Sep 17 00:00:00 2001
From: Michael Heuer <heuermh@acm.org>
Date: Fri, 11 Jan 2013 11:24:48 -0600
Subject: [PATCH 087/291] add repositories section to simple job pom.xml

---
 docs/quick-start.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/quick-start.md b/docs/quick-start.md
index 177cb14551..d46dc2da3f 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -200,6 +200,16 @@ To build the job, we also write a Maven `pom.xml` file that lists Spark as a dep
   <name>Simple Project</name>
   <packaging>jar</packaging>
   <version>1.0</version>
+  <repositories>
+    <repository>
+      <id>Spray.cc repository</id>
+      <url>http://repo.spray.cc</url>
+    </repository>
+    <repository>
+      <id>Typesafe repository</id>
+      <url>http://repo.typesafe.com/typesafe/releases</url>
+    </repository>
+  </repositories>
   <dependencies>
     <dependency> <!-- Spark dependency -->
       <groupId>org.spark-project</groupId>

From c063e8777ebaeb04056889064e9264edc019edbd Mon Sep 17 00:00:00 2001
From: Tyson <thamilton@verticalscope.com>
Date: Fri, 11 Jan 2013 14:57:38 -0500
Subject: [PATCH 088/291] Added implicit json writers for JobDescription and
 ExecutorRunner

---
 .../scala/spark/deploy/JsonProtocol.scala     | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/deploy/JsonProtocol.scala b/core/src/main/scala/spark/deploy/JsonProtocol.scala
index f14f804b3a..732fa08064 100644
--- a/core/src/main/scala/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/spark/deploy/JsonProtocol.scala
@@ -1,6 +1,7 @@
 package spark.deploy
 
 import master.{JobInfo, WorkerInfo}
+import worker.ExecutorRunner
 import cc.spray.json._
 
 /**
@@ -30,6 +31,24 @@ private[spark] object JsonProtocol extends DefaultJsonProtocol {
       "submitdate" -> JsString(obj.submitDate.toString))
   }
 
+  implicit object JobDescriptionJsonFormat extends RootJsonWriter[JobDescription] {
+    def write(obj: JobDescription) = JsObject(
+      "name" -> JsString(obj.name),
+      "cores" -> JsNumber(obj.cores),
+      "memoryperslave" -> JsNumber(obj.memoryPerSlave),
+      "user" -> JsString(obj.user)
+    )
+  }
+
+  implicit object ExecutorRunnerJsonFormat extends RootJsonWriter[ExecutorRunner] {
+    def write(obj: ExecutorRunner) = JsObject(
+      "id" -> JsNumber(obj.execId),
+      "memory" -> JsNumber(obj.memory),
+      "jobid" -> JsString(obj.jobId),
+      "jobdesc" -> obj.jobDesc.toJson.asJsObject
+    )
+  }
+
   implicit object MasterStateJsonFormat extends RootJsonWriter[MasterState] {
     def write(obj: MasterState) = JsObject(
       "url" -> JsString("spark://" + obj.uri),
@@ -51,7 +70,9 @@ private[spark] object JsonProtocol extends DefaultJsonProtocol {
       "cores" -> JsNumber(obj.cores),
       "coresused" -> JsNumber(obj.coresUsed),
       "memory" -> JsNumber(obj.memory),
-      "memoryused" -> JsNumber(obj.memoryUsed)
+      "memoryused" -> JsNumber(obj.memoryUsed),
+      "executors" -> JsArray(obj.executors.toList.map(_.toJson)),
+      "finishedexecutors" -> JsArray(obj.finishedExecutors.toList.map(_.toJson))
     )
   }
 }

From 1731f1fed4f1369662b1a9fde850a3dcba738a59 Mon Sep 17 00:00:00 2001
From: Tyson <thamilton@verticalscope.com>
Date: Fri, 11 Jan 2013 15:01:43 -0500
Subject: [PATCH 089/291] Added an optional format parameter for individual job
 queries and optimized the jobId query

---
 .../spark/deploy/master/MasterWebUI.scala     | 38 +++++++++++++------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
index 580014ef3f..458ee2d665 100644
--- a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
+++ b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
@@ -38,20 +38,36 @@ class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Direct
           }
       } ~
       path("job") {
-        parameter("jobId") { jobId =>
-          completeWith {
+        parameters("jobId", 'format ?) {
+          case (jobId, Some(js)) if (js.equalsIgnoreCase("json")) =>
             val future = master ? RequestMasterState
-            future.map { state => 
-              val masterState = state.asInstanceOf[MasterState]
-              
-              // A bit ugly an inefficient, but we won't have a number of jobs 
-              // so large that it will make a significant difference.
-              (masterState.activeJobs ++ masterState.completedJobs).find(_.id == jobId) match {
-                case Some(job) => spark.deploy.master.html.job_details.render(job)
-                case _ => null
+            val jobInfo = for (masterState <- future.mapTo[MasterState]) yield {
+              masterState.activeJobs.find(_.id == jobId) match {
+                case Some(job) => job
+                case _ => masterState.completedJobs.find(_.id == jobId) match {
+                  case Some(job) => job
+                  case _ => null
+                }
+              }
+            }
+            respondWithMediaType(MediaTypes.`application/json`) { ctx =>
+              ctx.complete(jobInfo.mapTo[JobInfo])
+            }
+          case (jobId, _) =>
+            completeWith {
+              val future = master ? RequestMasterState
+              future.map { state =>
+                val masterState = state.asInstanceOf[MasterState]
+
+                masterState.activeJobs.find(_.id == jobId) match {
+                  case Some(job) => spark.deploy.master.html.job_details.render(job)
+                  case _ => masterState.completedJobs.find(_.id == jobId) match {
+                    case Some(job) => spark.deploy.master.html.job_details.render(job)
+                    case _ => null
+                  }
+                }
               }
             }
-          }
         }
       } ~
       pathPrefix("static") {

From bbc56d85ed4eb4c3a09b20d5457f704f4b8a70c4 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Sat, 12 Jan 2013 15:24:13 -0800
Subject: [PATCH 090/291] Rename environment variable for hadoop profiles to
 hadoopVersion

---
 bagel/pom.xml    | 4 ++--
 core/pom.xml     | 4 ++--
 examples/pom.xml | 4 ++--
 pom.xml          | 5 +++--
 repl-bin/pom.xml | 4 ++--
 repl/pom.xml     | 4 ++--
 6 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/bagel/pom.xml b/bagel/pom.xml
index c3461fb889..5f58347204 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -47,7 +47,7 @@
       <id>hadoop1</id>
       <activation>
         <property>
-          <name>!hadoop</name>
+          <name>!hadoopVersion</name>
         </property>
       </activation>
       <dependencies>
@@ -79,7 +79,7 @@
       <id>hadoop2</id>
       <activation>
         <property>
-          <name>hadoop</name>
+          <name>hadoopVersion</name>
           <value>2</value>
         </property>
       </activation>
diff --git a/core/pom.xml b/core/pom.xml
index c8ff625774..ad9fdcde2c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -161,7 +161,7 @@
       <id>hadoop1</id>
       <activation>
         <property>
-          <name>!hadoop</name>
+          <name>!hadoopVersion</name>
         </property>
       </activation>
       <dependencies>
@@ -218,7 +218,7 @@
       <id>hadoop2</id>
       <activation>
         <property>
-          <name>hadoop</name>
+          <name>hadoopVersion</name>
           <value>2</value>
         </property>
       </activation>
diff --git a/examples/pom.xml b/examples/pom.xml
index d0b1e97747..3355deb6b7 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -47,7 +47,7 @@
       <id>hadoop1</id>
       <activation>
         <property>
-          <name>!hadoop</name>
+          <name>!hadoopVersion</name>
         </property>
       </activation>
       <dependencies>
@@ -79,7 +79,7 @@
       <id>hadoop2</id>
       <activation>
         <property>
-          <name>hadoop</name>
+          <name>hadoopVersion</name>
           <value>2</value>
         </property>
       </activation>
diff --git a/pom.xml b/pom.xml
index ae87813d4e..8f1af673a3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -483,9 +483,10 @@
       <id>hadoop1</id>
       <activation>
         <property>
-          <name>!hadoop</name>
+          <name>!hadoopVersion</name>
         </property>
       </activation>
+
       <properties>
         <hadoop.major.version>1</hadoop.major.version>
       </properties>
@@ -504,7 +505,7 @@
       <id>hadoop2</id>
       <activation>
         <property>
-          <name>hadoop</name>
+          <name>hadoopVersion</name>
           <value>2</value>
         </property>
       </activation>
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index 54ae20659e..da91c0f3ab 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -72,7 +72,7 @@
       <id>hadoop1</id>
       <activation>
         <property>
-          <name>!hadoop</name>
+          <name>!hadoopVersion</name>
         </property>
       </activation>
       <properties>
@@ -117,7 +117,7 @@
       <id>hadoop2</id>
       <activation>
         <property>
-          <name>hadoop</name>
+          <name>hadoopVersion</name>
           <value>2</value>
         </property>
       </activation>
diff --git a/repl/pom.xml b/repl/pom.xml
index 3e979b93a6..38e883c7f8 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -74,7 +74,7 @@
       <id>hadoop1</id>
       <activation>
         <property>
-          <name>!hadoop</name>
+          <name>!hadoopVersion</name>
         </property>
       </activation>
       <properties>
@@ -123,7 +123,7 @@
       <id>hadoop2</id>
       <activation>
         <property>
-          <name>hadoop</name>
+          <name>hadoopVersion</name>
           <value>2</value>
         </property>
       </activation>

From ba06e9c97cc3f8723ffdc3895182c529d3bb2fb3 Mon Sep 17 00:00:00 2001
From: Eric Zhang <ericzhang.buaa@gmail.com>
Date: Sun, 13 Jan 2013 15:33:11 +0800
Subject: [PATCH 091/291] Update
 examples/src/main/scala/spark/examples/LocalLR.scala
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix spelling mistake
---
 examples/src/main/scala/spark/examples/LocalLR.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/scala/spark/examples/LocalLR.scala b/examples/src/main/scala/spark/examples/LocalLR.scala
index f2ac2b3e06..9553162004 100644
--- a/examples/src/main/scala/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/spark/examples/LocalLR.scala
@@ -5,7 +5,7 @@ import spark.util.Vector
 
 object LocalLR {
   val N = 10000  // Number of data points
-  val D = 10   // Numer of dimensions
+  val D = 10   // Number of dimensions
   val R = 0.7  // Scaling factor
   val ITERATIONS = 5
   val rand = new Random(42)

From 88d8f11365db84d46ff456495c07f664c91d1896 Mon Sep 17 00:00:00 2001
From: Mikhail Bautin <mbautin@gmail.com>
Date: Sun, 13 Jan 2013 00:45:52 -0800
Subject: [PATCH 092/291] Add missing dependency spray-json to Maven build

---
 core/pom.xml | 4 ++++
 pom.xml      | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/core/pom.xml b/core/pom.xml
index ad9fdcde2c..862d3ec37a 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -71,6 +71,10 @@
       <groupId>cc.spray</groupId>
       <artifactId>spray-server</artifactId>
     </dependency>
+    <dependency>
+      <groupId>cc.spray</groupId>
+      <artifactId>spray-json_${scala.version}</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.tomdz.twirl</groupId>
       <artifactId>twirl-api</artifactId>
diff --git a/pom.xml b/pom.xml
index 8f1af673a3..751189a9d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -54,6 +54,7 @@
     <mesos.version>0.9.0-incubating</mesos.version>
     <akka.version>2.0.3</akka.version>
     <spray.version>1.0-M2.1</spray.version>
+    <spray.json.version>1.1.1</spray.json.version>
     <slf4j.version>1.6.1</slf4j.version>
     <cdh.version>4.1.2</cdh.version>
   </properties>
@@ -222,6 +223,11 @@
         <artifactId>spray-server</artifactId>
         <version>${spray.version}</version>
       </dependency>
+      <dependency>
+        <groupId>cc.spray</groupId>
+        <artifactId>spray-json_${scala.version}</artifactId>
+        <version>${spray.json.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.tomdz.twirl</groupId>
         <artifactId>twirl-api</artifactId>

From be7166146bf5692369272b85622d5316eccfd8e6 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Sun, 13 Jan 2013 15:27:28 -0800
Subject: [PATCH 093/291] Removed the use of getOrElse to avoid Scala wrapper
 for every call.

---
 core/src/main/scala/spark/rdd/CoGroupedRDD.scala | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
index 2e051c81c8..ce5f171911 100644
--- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
@@ -1,8 +1,8 @@
 package spark.rdd
 
 import java.util.{HashMap => JHashMap}
+import scala.collection.JavaConversions
 import scala.collection.mutable.ArrayBuffer
-import scala.collection.JavaConversions._
 
 import spark.{Aggregator, Logging, Partitioner, RDD, SparkEnv, Split, TaskContext}
 import spark.{Dependency, OneToOneDependency, ShuffleDependency}
@@ -74,7 +74,14 @@ class CoGroupedRDD[K](@transient rdds: Seq[RDD[(_, _)]], part: Partitioner)
     val numRdds = split.deps.size
     val map = new JHashMap[K, Seq[ArrayBuffer[Any]]]
     def getSeq(k: K): Seq[ArrayBuffer[Any]] = {
-      map.getOrElseUpdate(k, Array.fill(numRdds)(new ArrayBuffer[Any]))
+      val seq = map.get(k)
+      if (seq != null) {
+        seq
+      } else {
+        val seq = Array.fill(numRdds)(new ArrayBuffer[Any])
+        map.put(k, seq)
+        seq
+      }
     }
     for ((dep, depNum) <- split.deps.zipWithIndex) dep match {
       case NarrowCoGroupSplitDep(rdd, itsSplit) => {
@@ -94,6 +101,6 @@ class CoGroupedRDD[K](@transient rdds: Seq[RDD[(_, _)]], part: Partitioner)
         fetcher.fetch[K, Seq[Any]](shuffleId, split.index).foreach(mergePair)
       }
     }
-    map.iterator
+    JavaConversions.mapAsScalaMap(map).iterator
   }
 }

From 72408e8dfacc24652f376d1ee4dd6f04edb54804 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 13 Jan 2013 19:34:07 -0800
Subject: [PATCH 094/291] Make filter preserve partitioner info, since it can

---
 core/src/main/scala/spark/rdd/FilteredRDD.scala   | 3 ++-
 core/src/test/scala/spark/PartitioningSuite.scala | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/rdd/FilteredRDD.scala b/core/src/main/scala/spark/rdd/FilteredRDD.scala
index b148da28de..d46549b8b6 100644
--- a/core/src/main/scala/spark/rdd/FilteredRDD.scala
+++ b/core/src/main/scala/spark/rdd/FilteredRDD.scala
@@ -7,5 +7,6 @@ private[spark]
 class FilteredRDD[T: ClassManifest](prev: RDD[T], f: T => Boolean) extends RDD[T](prev.context) {
   override def splits = prev.splits
   override val dependencies = List(new OneToOneDependency(prev))
+  override val partitioner = prev.partitioner    // Since filter cannot change a partition's keys
   override def compute(split: Split, context: TaskContext) = prev.iterator(split, context).filter(f)
-}
\ No newline at end of file
+}
diff --git a/core/src/test/scala/spark/PartitioningSuite.scala b/core/src/test/scala/spark/PartitioningSuite.scala
index f09b602a7b..eb3c8f238f 100644
--- a/core/src/test/scala/spark/PartitioningSuite.scala
+++ b/core/src/test/scala/spark/PartitioningSuite.scala
@@ -106,6 +106,11 @@ class PartitioningSuite extends FunSuite with BeforeAndAfter {
     assert(grouped2.leftOuterJoin(reduced2).partitioner === grouped2.partitioner)
     assert(grouped2.rightOuterJoin(reduced2).partitioner === grouped2.partitioner)
     assert(grouped2.cogroup(reduced2).partitioner === grouped2.partitioner)
+
+    assert(grouped2.map(_ => 1).partitioner === None)
+    assert(grouped2.mapValues(_ => 1).partitioner === grouped2.partitioner)
+    assert(grouped2.flatMapValues(_ => Seq(1)).partitioner === grouped2.partitioner)
+    assert(grouped2.filter(_._1 > 4).partitioner === grouped2.partitioner)
   }
 
   test("partitioning Java arrays should fail") {

From 273fb5cc109ac0a032f84c1566ae908cd0eb27b6 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Thu, 3 Jan 2013 14:09:56 -0800
Subject: [PATCH 095/291] Throw FetchFailedException for cached missing locs

---
 .../main/scala/spark/MapOutputTracker.scala   | 36 +++++++++++++------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/spark/MapOutputTracker.scala b/core/src/main/scala/spark/MapOutputTracker.scala
index 70eb9f702e..9f2aa76830 100644
--- a/core/src/main/scala/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/spark/MapOutputTracker.scala
@@ -139,8 +139,8 @@ private[spark] class MapOutputTracker(actorSystem: ActorSystem, isMaster: Boolea
               case e: InterruptedException =>
             }
           }
-          return mapStatuses.get(shuffleId).map(status =>
-            (status.address, MapOutputTracker.decompressSize(status.compressedSizes(reduceId))))
+          return MapOutputTracker.convertMapStatuses(shuffleId, reduceId,
+                                                     mapStatuses.get(shuffleId))
         } else {
           fetching += shuffleId
         }
@@ -156,21 +156,15 @@ private[spark] class MapOutputTracker(actorSystem: ActorSystem, isMaster: Boolea
         fetchedStatuses = deserializeStatuses(fetchedBytes)
         logInfo("Got the output locations")
         mapStatuses.put(shuffleId, fetchedStatuses)
-        if (fetchedStatuses.contains(null)) {
-          throw new FetchFailedException(null, shuffleId, -1, reduceId,
-            new Exception("Missing an output location for shuffle " + shuffleId))
-        }
       } finally {
         fetching.synchronized {
           fetching -= shuffleId
           fetching.notifyAll()
         }
       }
-      return fetchedStatuses.map(s =>
-        (s.address, MapOutputTracker.decompressSize(s.compressedSizes(reduceId))))
+      return MapOutputTracker.convertMapStatuses(shuffleId, reduceId, fetchedStatuses)
     } else {
-      return statuses.map(s =>
-        (s.address, MapOutputTracker.decompressSize(s.compressedSizes(reduceId))))
+      return MapOutputTracker.convertMapStatuses(shuffleId, reduceId, statuses)
     }
   }
 
@@ -258,6 +252,28 @@ private[spark] class MapOutputTracker(actorSystem: ActorSystem, isMaster: Boolea
 private[spark] object MapOutputTracker {
   private val LOG_BASE = 1.1
 
+  // Convert an array of MapStatuses to locations and sizes for a given reduce ID. If
+  // any of the statuses is null (indicating a missing location due to a failed mapper),
+  // throw a FetchFailedException.
+  def convertMapStatuses(
+        shuffleId: Int,
+        reduceId: Int,
+        statuses: Array[MapStatus]): Array[(BlockManagerId, Long)] = {
+    if (statuses == null) {
+      throw new FetchFailedException(null, shuffleId, -1, reduceId,
+        new Exception("Missing all output locations for shuffle " + shuffleId))
+    }
+    statuses.map {
+      status => 
+        if (status == null) {
+          throw new FetchFailedException(null, shuffleId, -1, reduceId,
+            new Exception("Missing an output location for shuffle " + shuffleId))
+        } else {
+          (status.address, decompressSize(status.compressedSizes(reduceId)))
+        }
+    }
+  }
+
   /**
    * Compress a size in bytes to 8 bits for efficient reporting of map output sizes.
    * We do this by encoding the log base 1.1 of the size as an integer, which can support

From 7ba34bc007ec10d12b2a871749f32232cdbc0d9c Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Mon, 14 Jan 2013 15:24:08 -0800
Subject: [PATCH 096/291] Additional tests for MapOutputTracker.

---
 .../scala/spark/MapOutputTrackerSuite.scala   | 82 ++++++++++++++++++-
 1 file changed, 80 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
index 5b4b198960..6c6f82e274 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
@@ -1,12 +1,18 @@
 package spark
 
 import org.scalatest.FunSuite
+import org.scalatest.BeforeAndAfter
 
 import akka.actor._
 import spark.scheduler.MapStatus
 import spark.storage.BlockManagerId
+import spark.util.AkkaUtils
 
-class MapOutputTrackerSuite extends FunSuite {
+class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
+  after {
+    System.clearProperty("spark.master.port")
+  }
+ 
   test("compressSize") {
     assert(MapOutputTracker.compressSize(0L) === 0)
     assert(MapOutputTracker.compressSize(1L) === 1)
@@ -71,6 +77,78 @@ class MapOutputTrackerSuite extends FunSuite {
     // The remaining reduce task might try to grab the output dispite the shuffle failure;
     // this should cause it to fail, and the scheduler will ignore the failure due to the
     // stage already being aborted.
-    intercept[Exception] { tracker.getServerStatuses(10, 1) }
+    intercept[FetchFailedException] { tracker.getServerStatuses(10, 1) }
+  }
+
+  test("remote fetch") {
+    val (actorSystem, boundPort) =
+      AkkaUtils.createActorSystem("test", "localhost", 0)
+    System.setProperty("spark.master.port", boundPort.toString)
+    val masterTracker = new MapOutputTracker(actorSystem, true)
+    val slaveTracker = new MapOutputTracker(actorSystem, false)
+    masterTracker.registerShuffle(10, 1)
+    masterTracker.incrementGeneration()
+    slaveTracker.updateGeneration(masterTracker.getGeneration)
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+
+    val compressedSize1000 = MapOutputTracker.compressSize(1000L)
+    val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
+    masterTracker.registerMapOutput(10, 0, new MapStatus(
+      new BlockManagerId("hostA", 1000), Array(compressedSize1000)))
+    masterTracker.incrementGeneration()
+    slaveTracker.updateGeneration(masterTracker.getGeneration)
+    assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
+           Seq((new BlockManagerId("hostA", 1000), size1000)))
+
+    masterTracker.unregisterMapOutput(10, 0, new BlockManagerId("hostA", 1000))
+    masterTracker.incrementGeneration()
+    slaveTracker.updateGeneration(masterTracker.getGeneration)
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+  }
+
+  test("simulatenous fetch fails") {
+    val dummyActorSystem = ActorSystem("testDummy")
+    val dummyTracker = new MapOutputTracker(dummyActorSystem, true)
+    dummyTracker.registerShuffle(10, 1)
+    // val compressedSize1000 = MapOutputTracker.compressSize(1000L)
+    // val size100 = MapOutputTracker.decompressSize(compressedSize1000)
+    // dummyTracker.registerMapOutput(10, 0, new MapStatus(
+    //   new BlockManagerId("hostA", 1000), Array(compressedSize1000)))
+    val serializedMessage = dummyTracker.getSerializedLocations(10)
+
+    val (actorSystem, boundPort) =
+      AkkaUtils.createActorSystem("test", "localhost", 0)
+    System.setProperty("spark.master.port", boundPort.toString)
+    val delayResponseLock = new java.lang.Object
+    val delayResponseActor = actorSystem.actorOf(Props(new Actor {
+      override def receive = {
+        case GetMapOutputStatuses(shuffleId: Int, requester: String) =>
+          delayResponseLock.synchronized {
+            sender ! serializedMessage
+          }
+      }
+    }), name = "MapOutputTracker")
+    val slaveTracker = new MapOutputTracker(actorSystem, false)
+    var firstFailed = false
+    var secondFailed = false
+    val firstFetch = new Thread {
+      override def run() {
+        intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+        firstFailed = true
+      }
+    }
+    val secondFetch = new Thread {
+      override def run() {
+        intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+        secondFailed = true
+      }
+    }
+    delayResponseLock.synchronized {
+      firstFetch.start
+      secondFetch.start
+    }
+    firstFetch.join
+    secondFetch.join
+    assert(firstFailed && secondFailed)
   }
 }

From b61a4ec77300d6e7fb40f771a9054ae8bc4488de Mon Sep 17 00:00:00 2001
From: seanm <sean.mcnamara@webtrends.com>
Date: Mon, 14 Jan 2013 17:13:10 -0700
Subject: [PATCH 097/291] Removing offset management code that is non-existent
 in kafka 0.7.0+

---
 .../scala/spark/streaming/dstream/KafkaInputDStream.scala  | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
index 2b4740bdf7..9605072382 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
@@ -173,13 +173,6 @@ class KafkaReceiver(host: String, port: Int, groupId: String,
       stream.takeWhile { msgAndMetadata =>
         blockGenerator += msgAndMetadata.message
 
-        // Updating the offet. The key is (broker, topic, group, partition).
-        val key = KafkaPartitionKey(msgAndMetadata.topicInfo.brokerId, msgAndMetadata.topic, 
-          groupId, msgAndMetadata.topicInfo.partition.partId)
-        val offset = msgAndMetadata.topicInfo.getConsumeOffset
-        offsets.put(key, offset)
-        // logInfo("Handled message: " + (key, offset).toString)
-
         // Keep on handling messages
         true
       }  

From c203a292963a018bd9b84f02bb522fd191a110af Mon Sep 17 00:00:00 2001
From: seanm <sean.mcnamara@webtrends.com>
Date: Mon, 14 Jan 2013 17:22:03 -0700
Subject: [PATCH 098/291] StateDStream changes to give updateStateByKey
 consistent behavior

---
 .../scala/spark/streaming/dstream/StateDStream.scala | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala
index a1ec2f5454..4e57968eed 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala
@@ -48,8 +48,16 @@ class StateDStream[K: ClassManifest, V: ClassManifest, S <: AnyRef : ClassManife
             //logDebug("Generating state RDD for time " + validTime)
             return Some(stateRDD)
           }
-          case None => {    // If parent RDD does not exist, then return old state RDD
-            return Some(prevStateRDD)
+          case None => {    // If parent RDD does not exist
+
+            // Re-apply the update function to the old state RDD
+            val updateFuncLocal = updateFunc
+            val finalFunc = (iterator: Iterator[(K, S)]) => {
+              val i = iterator.map(t => (t._1, Seq[V](), Option(t._2)))
+              updateFuncLocal(i)
+            }
+            val stateRDD = prevStateRDD.mapPartitions(finalFunc, preservePartitioning)
+            return Some(stateRDD)
           }
         }
       }

From b0389997972d383c3aaa87924b725dee70b18d8e Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Mon, 14 Jan 2013 17:04:44 -0800
Subject: [PATCH 099/291] Fix accidental spark.master.host reuse

---
 core/src/test/scala/spark/MapOutputTrackerSuite.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
index 6c6f82e274..aa1d8ac7e6 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
@@ -81,6 +81,7 @@ class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("remote fetch") {
+    System.clearProperty("spark.master.host")
     val (actorSystem, boundPort) =
       AkkaUtils.createActorSystem("test", "localhost", 0)
     System.setProperty("spark.master.port", boundPort.toString)
@@ -107,6 +108,7 @@ class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("simulatenous fetch fails") {
+    System.clearProperty("spark.master.host")
     val dummyActorSystem = ActorSystem("testDummy")
     val dummyTracker = new MapOutputTracker(dummyActorSystem, true)
     dummyTracker.registerShuffle(10, 1)

From b77f7390a5a18c2b88fbc0c276c4dbc938560127 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nick.pentreath@gmail.com>
Date: Tue, 15 Jan 2013 09:04:32 +0200
Subject: [PATCH 100/291] Python ALS example

---
 python/examples/als.py | 71 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100755 python/examples/als.py

diff --git a/python/examples/als.py b/python/examples/als.py
new file mode 100755
index 0000000000..284cf0d3a2
--- /dev/null
+++ b/python/examples/als.py
@@ -0,0 +1,71 @@
+"""
+This example requires numpy (http://www.numpy.org/)
+"""
+from os.path import realpath
+import sys
+
+import numpy as np
+from numpy.random import rand
+from numpy import matrix
+from pyspark import SparkContext
+
+LAMBDA = 0.01   # regularization
+np.random.seed(42)
+
+def rmse(R, ms, us):
+    diff = R - ms * us.T
+    return np.sqrt(np.sum(np.power(diff, 2)) / M * U)
+
+def update(i, vec, mat, ratings):
+    uu = mat.shape[0]
+    ff = mat.shape[1]
+    XtX = matrix(np.zeros((ff, ff)))
+    Xty = np.zeros((ff, 1))
+
+    for j in range(uu):
+        v = mat[j, :]
+        XtX += v.T * v
+        Xty += v.T * ratings[i, j]
+    XtX += np.eye(ff, ff) * LAMBDA * uu
+    return np.linalg.solve(XtX, Xty)
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print >> sys.stderr, \
+            "Usage: PythonALS <master> <M> <U> <F> <iters> <slices>"
+        exit(-1)
+    sc = SparkContext(sys.argv[1], "PythonALS", pyFiles=[realpath(__file__)])
+    M = int(sys.argv[2]) if len(sys.argv) > 2 else 100
+    U = int(sys.argv[3]) if len(sys.argv) > 3 else 500
+    F = int(sys.argv[4]) if len(sys.argv) > 4 else 10
+    ITERATIONS = int(sys.argv[5]) if len(sys.argv) > 5 else 5
+    slices = int(sys.argv[6]) if len(sys.argv) > 6 else 2
+
+    print "Running ALS with M=%d, U=%d, F=%d, iters=%d, slices=%d\n" % \
+            (M, U, F, ITERATIONS, slices)
+
+    R = matrix(rand(M, F)) * matrix(rand(U, F).T)
+    ms = matrix(rand(M ,F))
+    us = matrix(rand(U, F))
+
+    Rb = sc.broadcast(R)
+    msb = sc.broadcast(ms)
+    usb = sc.broadcast(us)
+
+    for i in range(ITERATIONS):
+        ms = sc.parallelize(range(M), slices) \
+               .map(lambda x: update(x, msb.value[x, :], usb.value, Rb.value)) \
+               .collect()
+        ms = matrix(np.array(ms)[:, :, 0])      # collect() returns a list, so array ends up being
+                                                # a 3-d array, we take the first 2 dims for the matrix
+        msb = sc.broadcast(ms)
+
+        us = sc.parallelize(range(U), slices) \
+               .map(lambda x: update(x, usb.value[x, :], msb.value, Rb.value.T)) \
+               .collect()
+        us = matrix(np.array(us)[:, :, 0])
+        usb = sc.broadcast(us)
+
+        error = rmse(R, ms, us)
+        print "Iteration %d:" % i 
+        print "\nRMSE: %5.4f\n" % error
\ No newline at end of file

From dd583b7ebf0e6620ec8e35424b59db451febe3e8 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 15 Jan 2013 10:52:06 -0600
Subject: [PATCH 101/291] Call executeOnCompleteCallbacks in a finally block.

---
 core/src/main/scala/spark/scheduler/ResultTask.scala | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/ResultTask.scala b/core/src/main/scala/spark/scheduler/ResultTask.scala
index e492279b4e..2aad7956b4 100644
--- a/core/src/main/scala/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/spark/scheduler/ResultTask.scala
@@ -15,9 +15,11 @@ private[spark] class ResultTask[T, U](
 
   override def run(attemptId: Long): U = {
     val context = new TaskContext(stageId, partition, attemptId)
-    val result = func(context, rdd.iterator(split, context))
-    context.executeOnCompleteCallbacks()
-    result
+    try {
+      func(context, rdd.iterator(split, context))
+    } finally {
+      context.executeOnCompleteCallbacks()
+    }
   }
 
   override def preferredLocations: Seq[String] = locs

From d228bff440395e8e6b8d67483467dde65b08ab40 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 15 Jan 2013 11:48:50 -0600
Subject: [PATCH 102/291] Add a test.

---
 .../spark/scheduler/TaskContextSuite.scala    | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 core/src/test/scala/spark/scheduler/TaskContextSuite.scala

diff --git a/core/src/test/scala/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/spark/scheduler/TaskContextSuite.scala
new file mode 100644
index 0000000000..f937877340
--- /dev/null
+++ b/core/src/test/scala/spark/scheduler/TaskContextSuite.scala
@@ -0,0 +1,43 @@
+package spark.scheduler
+
+import org.scalatest.FunSuite
+import org.scalatest.BeforeAndAfter
+import spark.TaskContext
+import spark.RDD
+import spark.SparkContext
+import spark.Split
+
+class TaskContextSuite extends FunSuite with BeforeAndAfter {
+
+  var sc: SparkContext = _
+
+  after {
+    if (sc != null) {
+      sc.stop()
+      sc = null
+    }
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.master.port")
+  }
+
+  test("Calls executeOnCompleteCallbacks after failure") {
+    var completed = false
+    sc = new SparkContext("local", "test")
+    val rdd = new RDD[String](sc) {
+      override val splits = Array[Split](StubSplit(0))
+      override val dependencies = List()
+      override def compute(split: Split, context: TaskContext) = {
+        context.addOnCompleteCallback(() => completed = true)
+        sys.error("failed")
+      }
+    }
+    val func = (c: TaskContext, i: Iterator[String]) => i.next
+    val task = new ResultTask[String, String](0, rdd, func, 0, Seq(), 0)
+    intercept[RuntimeException] {
+      task.run(0)
+    }
+    assert(completed === true)
+  }
+
+  case class StubSplit(val index: Int) extends Split
+}
\ No newline at end of file

From c7143e5507f1d5292e678315158d3863c9bb4242 Mon Sep 17 00:00:00 2001
From: Andrew Psaltis <andrew.psaltis@webtrends.com>
Date: Tue, 15 Jan 2013 12:45:42 -0700
Subject: [PATCH 103/291] Changed teh scala version to 2.9.2, so that the
 classes can be found when the classpath is expanded.

---
 run2.cmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run2.cmd b/run2.cmd
index 83464b1166..67f1e465e4 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -1,6 +1,6 @@
 @echo off
 
-set SCALA_VERSION=2.9.1
+set SCALA_VERSION=2.9.2
 
 rem Figure out where the Spark framework is installed
 set FWDIR=%~dp0

From 74d3b23929758328c2a7879381669d81bf899396 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 15 Jan 2013 14:03:28 -0600
Subject: [PATCH 104/291] Add spark.executor.memory to differentiate executor
 memory from spark-shell memory.

---
 core/src/main/scala/spark/SparkContext.scala          |  4 ++--
 .../scala/spark/deploy/worker/ExecutorRunner.scala    |  3 +--
 .../cluster/SparkDeploySchedulerBackend.scala         | 11 +++++------
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index bbf8272eb3..a5a1b75944 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -111,8 +111,8 @@ class SparkContext(
 
   // Environment variables to pass to our executors
   private[spark] val executorEnvs = HashMap[String, String]()
-  for (key <- Seq("SPARK_MEM", "SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS",
-       "SPARK_TESTING")) {
+  // Note: SPARK_MEM isn't included because it's set directly in ExecutorRunner
+  for (key <- Seq("SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS", "SPARK_TESTING")) {
     val value = System.getenv(key)
     if (value != null) {
       executorEnvs(key) = value
diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
index beceb55ecd..2f2ea617ff 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
@@ -118,8 +118,7 @@ private[spark] class ExecutorRunner(
       for ((key, value) <- jobDesc.command.environment) {
         env.put(key, value)
       }
-      env.put("SPARK_CORES", cores.toString)
-      env.put("SPARK_MEMORY", memory.toString)
+      env.put("SPARK_MEM", memory.toString)
       // In case we are running this from within the Spark Shell, avoid creating a "scala"
       // parent process for the executor command
       env.put("SPARK_LAUNCH_WITH_SCALA", "0")
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index e2301347e5..f2fb244b24 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -23,12 +23,11 @@ private[spark] class SparkDeploySchedulerBackend(
 
   // Memory used by each executor (in megabytes)
   val executorMemory = {
-    if (System.getenv("SPARK_MEM") != null) {
-      Utils.memoryStringToMb(System.getenv("SPARK_MEM"))
-      // TODO: Might need to add some extra memory for the non-heap parts of the JVM
-    } else {
-      512
-    }
+    // TODO: Might need to add some extra memory for the non-heap parts of the JVM
+    Option(System.getProperty("spark.executor.memory"))
+      .orElse(Option(System.getenv("SPARK_MEM")))
+      .map(Utils.memoryStringToMb)
+      .getOrElse(512)
   }
 
   override def start() {

From 4078623b9f2a338d4992c3dfd3af3a5550615180 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Tue, 15 Jan 2013 12:05:54 -0800
Subject: [PATCH 105/291] Remove broken attempt to test fetching case.

---
 .../scala/spark/MapOutputTrackerSuite.scala   | 48 +------------------
 1 file changed, 2 insertions(+), 46 deletions(-)

diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
index aa1d8ac7e6..d3dd3a8fa4 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
@@ -105,52 +105,8 @@ class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
     masterTracker.incrementGeneration()
     slaveTracker.updateGeneration(masterTracker.getGeneration)
     intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
-  }
 
-  test("simulatenous fetch fails") {
-    System.clearProperty("spark.master.host")
-    val dummyActorSystem = ActorSystem("testDummy")
-    val dummyTracker = new MapOutputTracker(dummyActorSystem, true)
-    dummyTracker.registerShuffle(10, 1)
-    // val compressedSize1000 = MapOutputTracker.compressSize(1000L)
-    // val size100 = MapOutputTracker.decompressSize(compressedSize1000)
-    // dummyTracker.registerMapOutput(10, 0, new MapStatus(
-    //   new BlockManagerId("hostA", 1000), Array(compressedSize1000)))
-    val serializedMessage = dummyTracker.getSerializedLocations(10)
-
-    val (actorSystem, boundPort) =
-      AkkaUtils.createActorSystem("test", "localhost", 0)
-    System.setProperty("spark.master.port", boundPort.toString)
-    val delayResponseLock = new java.lang.Object
-    val delayResponseActor = actorSystem.actorOf(Props(new Actor {
-      override def receive = {
-        case GetMapOutputStatuses(shuffleId: Int, requester: String) =>
-          delayResponseLock.synchronized {
-            sender ! serializedMessage
-          }
-      }
-    }), name = "MapOutputTracker")
-    val slaveTracker = new MapOutputTracker(actorSystem, false)
-    var firstFailed = false
-    var secondFailed = false
-    val firstFetch = new Thread {
-      override def run() {
-        intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
-        firstFailed = true
-      }
-    }
-    val secondFetch = new Thread {
-      override def run() {
-        intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
-        secondFailed = true
-      }
-    }
-    delayResponseLock.synchronized {
-      firstFetch.start
-      secondFetch.start
-    }
-    firstFetch.join
-    secondFetch.join
-    assert(firstFailed && secondFailed)
+    // failure should be cached
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
   }
 }

From a805ac4a7cdd520b6141dd885c780c526bb54ba6 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 16 Jan 2013 10:55:26 -0800
Subject: [PATCH 106/291] Disabled checkpoint for PairwiseRDD (pySpark).

---
 core/src/main/scala/spark/api/python/PythonRDD.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 276035a9ad..0138b22d38 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -138,6 +138,7 @@ private class PairwiseRDD(prev: RDD[Array[Byte]]) extends
       case Seq(a, b) => (a, b)
       case x          => throw new Exception("PairwiseRDD: unexpected value: " + x)
     }
+  override def checkpoint() { }
   val asJavaPairRDD : JavaPairRDD[Array[Byte], Array[Byte]] = JavaPairRDD.fromRDD(this)
 }
 

From eae698f755f41fd8bdff94c498df314ed74aa3c1 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Wed, 16 Jan 2013 12:21:37 -0800
Subject: [PATCH 107/291] remove unused thread pool

---
 .../main/scala/spark/executor/StandaloneExecutorBackend.scala  | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
index 915f71ba9f..a29bf974d2 100644
--- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
+++ b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
@@ -24,9 +24,6 @@ private[spark] class StandaloneExecutorBackend(
   with ExecutorBackend
   with Logging {
 
-  val threadPool = new ThreadPoolExecutor(
-    1, 128, 600, TimeUnit.SECONDS, new SynchronousQueue[Runnable])
-
   var master: ActorRef = null
 
   override def preStart() {

From 42fbef3c2a6460bcd389bb86306be3ebc14c998b Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nick.pentreath@gmail.com>
Date: Thu, 17 Jan 2013 15:54:59 +0200
Subject: [PATCH 108/291] Adding default command line args to SparkALS

---
 .../main/scala/spark/examples/SparkALS.scala  | 27 ++++++++++++-------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/examples/src/main/scala/spark/examples/SparkALS.scala b/examples/src/main/scala/spark/examples/SparkALS.scala
index fb28e2c932..cbd749666d 100644
--- a/examples/src/main/scala/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/spark/examples/SparkALS.scala
@@ -7,6 +7,7 @@ import cern.jet.math._
 import cern.colt.matrix._
 import cern.colt.matrix.linalg._
 import spark._
+import scala.Option
 
 object SparkALS {
   // Parameters set through command line arguments
@@ -97,21 +98,27 @@ object SparkALS {
   def main(args: Array[String]) {
     var host = ""
     var slices = 0
-    args match {
-      case Array(m, u, f, iters, slices_, host_) => {
-        M = m.toInt
-        U = u.toInt
-        F = f.toInt
-        ITERATIONS = iters.toInt
-        slices = slices_.toInt
-        host = host_
+
+    (1 to 6).map(i => {
+      i match {
+        case a if a < args.length => Option(args(a))
+        case _ => Option(null)
+      }
+    }).toArray match {
+      case Array(host_, m, u, f, iters, slices_) => {
+        host = host_ getOrElse "local"
+        M = (m getOrElse "100").toInt
+        U = (u getOrElse "500").toInt
+        F = (f getOrElse "10").toInt
+        ITERATIONS = (iters getOrElse "5").toInt
+        slices = (slices_ getOrElse "2").toInt
       }
       case _ => {
-        System.err.println("Usage: SparkALS <M> <U> <F> <iters> <slices> <master>")
+        System.err.println("Usage: SparkALS [<master> <M> <U> <F> <iters> <slices>]")
         System.exit(1)
       }
     }
-    printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS);
+    printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
     val spark = new SparkContext(host, "SparkALS")
     
     val R = generateR()

From a512df551f85086a6ec363744542e74749c6b560 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nick.pentreath@gmail.com>
Date: Thu, 17 Jan 2013 16:05:27 +0200
Subject: [PATCH 109/291] Fixed index error missing first argument

---
 examples/src/main/scala/spark/examples/SparkALS.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/scala/spark/examples/SparkALS.scala b/examples/src/main/scala/spark/examples/SparkALS.scala
index cbd749666d..4672812565 100644
--- a/examples/src/main/scala/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/spark/examples/SparkALS.scala
@@ -99,7 +99,7 @@ object SparkALS {
     var host = ""
     var slices = 0
 
-    (1 to 6).map(i => {
+    (0 to 5).map(i => {
       i match {
         case a if a < args.length => Option(args(a))
         case _ => Option(null)

From a5ba7a9f322dce763350864bf89d94e6656d9984 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nick.pentreath@gmail.com>
Date: Thu, 17 Jan 2013 16:21:00 +0200
Subject: [PATCH 110/291] Use only one update function and pass in transpose of
 ratings matrix where appropriate

---
 .../main/scala/spark/examples/SparkALS.scala  | 32 ++-----------------
 1 file changed, 3 insertions(+), 29 deletions(-)

diff --git a/examples/src/main/scala/spark/examples/SparkALS.scala b/examples/src/main/scala/spark/examples/SparkALS.scala
index 4672812565..2766ad1702 100644
--- a/examples/src/main/scala/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/spark/examples/SparkALS.scala
@@ -43,7 +43,7 @@ object SparkALS {
     return sqrt(sumSqs / (M * U))
   }
 
-  def updateMovie(i: Int, m: DoubleMatrix1D, us: Array[DoubleMatrix1D],
+  def update(i: Int, m: DoubleMatrix1D, us: Array[DoubleMatrix1D],
     R: DoubleMatrix2D) : DoubleMatrix1D =
   {
     val U = us.size
@@ -69,32 +69,6 @@ object SparkALS {
     return solved2D.viewColumn(0)
   }
 
-  def updateUser(j: Int, u: DoubleMatrix1D, ms: Array[DoubleMatrix1D],
-    R: DoubleMatrix2D) : DoubleMatrix1D =
-  {
-    val M = ms.size
-    val F = ms(0).size
-    val XtX = factory2D.make(F, F)
-    val Xty = factory1D.make(F)
-    // For each movie that the user rated
-    for (i <- 0 until M) {
-      val m = ms(i)
-      // Add m * m^t to XtX
-      blas.dger(1, m, m, XtX)
-      // Add m * rating to Xty
-      blas.daxpy(R.get(i, j), m, Xty)
-    }
-    // Add regularization coefs to diagonal terms
-    for (d <- 0 until F) {
-      XtX.set(d, d, XtX.get(d, d) + LAMBDA * M)
-    }
-    // Solve it with Cholesky
-    val ch = new CholeskyDecomposition(XtX)
-    val Xty2D = factory2D.make(Xty.toArray, F)
-    val solved2D = ch.solve(Xty2D)
-    return solved2D.viewColumn(0)
-  }
-
   def main(args: Array[String]) {
     var host = ""
     var slices = 0
@@ -134,11 +108,11 @@ object SparkALS {
     for (iter <- 1 to ITERATIONS) {
       println("Iteration " + iter + ":")
       ms = spark.parallelize(0 until M, slices)
-                .map(i => updateMovie(i, msc.value(i), usc.value, Rc.value))
+                .map(i => update(i, msc.value(i), usc.value, Rc.value))
                 .toArray
       msc = spark.broadcast(ms) // Re-broadcast ms because it was updated
       us = spark.parallelize(0 until U, slices)
-                .map(i => updateUser(i, usc.value(i), msc.value, Rc.value))
+                .map(i => update(i, usc.value(i), msc.value, algebra.transpose(Rc.value)))
                 .toArray
       usc = spark.broadcast(us) // Re-broadcast us because it was updated
       println("RMSE = " + rmse(R, ms, us))

From 892c32a14b89139b7bd89e141fc90b148a67ce68 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 17 Jan 2013 11:14:47 -0800
Subject: [PATCH 111/291] Warn users if they run pyspark or spark-shell without
 compiling Spark

---
 pyspark | 7 +++++++
 run     | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/pyspark b/pyspark
index 9e89d51ba2..ab7f4f50c0 100755
--- a/pyspark
+++ b/pyspark
@@ -6,6 +6,13 @@ FWDIR="$(cd `dirname $0`; pwd)"
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
+# Exit if the user hasn't compiled Spark
+if [ ! -e "$SPARK_HOME/repl/target" ]; then
+  echo "Failed to find Spark classes in $SPARK_HOME/repl/target" >&2
+  echo "You need to compile Spark before running this program" >&2
+  exit 1
+fi
+
 # Load environment variables from conf/spark-env.sh, if it exists
 if [ -e $FWDIR/conf/spark-env.sh ] ; then
   . $FWDIR/conf/spark-env.sh
diff --git a/run b/run
index ca23455386..eb93db66db 100755
--- a/run
+++ b/run
@@ -65,6 +65,13 @@ EXAMPLES_DIR="$FWDIR/examples"
 BAGEL_DIR="$FWDIR/bagel"
 PYSPARK_DIR="$FWDIR/python"
 
+# Exit if the user hasn't compiled Spark
+if [ ! -e "$REPL_DIR/target" ]; then
+  echo "Failed to find Spark classes in $REPL_DIR/target" >&2
+  echo "You need to compile Spark before running this program" >&2
+  exit 1
+fi
+
 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH"
 CLASSPATH+=":$FWDIR/conf"

From 742bc841adb2a57b05e7a155681a162ab9dfa2c1 Mon Sep 17 00:00:00 2001
From: Fernand Pajot <fernand@change.org>
Date: Thu, 17 Jan 2013 16:56:11 -0800
Subject: [PATCH 112/291] changed HttpBroadcast server cache to be in
 spark.local.dir instead of java.io.tmpdir

---
 core/src/main/scala/spark/broadcast/HttpBroadcast.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
index 7eb4ddb74f..96dc28f12a 100644
--- a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
@@ -89,7 +89,7 @@ private object HttpBroadcast extends Logging {
   }
 
   private def createServer() {
-    broadcastDir = Utils.createTempDir()
+    broadcastDir = Utils.createTempDir(System.getProperty("spark.local.dir", System.getProperty("java.io.tmpdir")))
     server = new HttpServer(broadcastDir)
     server.start()
     serverUri = server.uri

From 54c0f9f185576e9b844fa8f81ca410f188daa51c Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 17 Jan 2013 17:40:55 -0800
Subject: [PATCH 113/291] Fix code that assumed spark.local.dir is only a
 single directory

---
 core/src/main/scala/spark/Utils.scala                 | 11 ++++++++++-
 .../main/scala/spark/broadcast/HttpBroadcast.scala    |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
index 0e7007459d..aeed5d2f32 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/spark/Utils.scala
@@ -134,7 +134,7 @@ private object Utils extends Logging {
    */
   def fetchFile(url: String, targetDir: File) {
     val filename = url.split("/").last
-    val tempDir = System.getProperty("spark.local.dir", System.getProperty("java.io.tmpdir"))
+    val tempDir = getLocalDir
     val tempFile =  File.createTempFile("fetchFileTemp", null, new File(tempDir))
     val targetFile = new File(targetDir, filename)
     val uri = new URI(url)
@@ -204,6 +204,15 @@ private object Utils extends Logging {
     FileUtil.chmod(filename, "a+x")
   }
 
+  /**
+   * Get a temporary directory using Spark's spark.local.dir property, if set. This will always
+   * return a single directory, even though the spark.local.dir property might be a list of
+   * multiple paths.
+   */
+  def getLocalDir: String = {
+    System.getProperty("spark.local.dir", System.getProperty("java.io.tmpdir")).split(',')(0)
+  }
+
   /**
    * Shuffle the elements of a collection into a random order, returning the
    * result in a new collection. Unlike scala.util.Random.shuffle, this method
diff --git a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
index 96dc28f12a..856a4683a9 100644
--- a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
@@ -89,7 +89,7 @@ private object HttpBroadcast extends Logging {
   }
 
   private def createServer() {
-    broadcastDir = Utils.createTempDir(System.getProperty("spark.local.dir", System.getProperty("java.io.tmpdir")))
+    broadcastDir = Utils.createTempDir(Utils.getLocalDir)
     server = new HttpServer(broadcastDir)
     server.start()
     serverUri = server.uri

From 1db119a08f07b8707b901e92b03138b27e887844 Mon Sep 17 00:00:00 2001
From: seanm <sean.mcnamara@webtrends.com>
Date: Fri, 18 Jan 2013 20:22:23 -0700
Subject: [PATCH 114/291] kafka jar wasn't being included by run script

---
 run | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/run b/run
index 2f61cb2a87..494f04c3ac 100755
--- a/run
+++ b/run
@@ -76,6 +76,9 @@ CLASSPATH+=":$CORE_DIR/src/main/resources"
 CLASSPATH+=":$REPL_DIR/target/scala-$SCALA_VERSION/classes"
 CLASSPATH+=":$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
 CLASSPATH+=":$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
+for jar in `find "$STREAMING_DIR/lib" -name '*jar'`; do
+  CLASSPATH+=":$jar"
+done
 if [ -e "$FWDIR/lib_managed" ]; then
   for jar in `find "$FWDIR/lib_managed/jars" -name '*jar'`; do
     CLASSPATH+=":$jar"

From 56b7fbafa2b7717896c613e39ecc134f2405b4c6 Mon Sep 17 00:00:00 2001
From: seanm <sean.mcnamara@webtrends.com>
Date: Fri, 18 Jan 2013 21:15:54 -0700
Subject: [PATCH 115/291] further KafkaInputDStream cleanup (removing unused
 and commented out code relating to offset management)

---
 .../streaming/dstream/KafkaInputDStream.scala | 72 +------------------
 1 file changed, 3 insertions(+), 69 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
index 9605072382..533c91ee95 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
@@ -19,15 +19,6 @@ import scala.collection.JavaConversions._
 
 // Key for a specific Kafka Partition: (broker, topic, group, part)
 case class KafkaPartitionKey(brokerId: Int, topic: String, groupId: String, partId: Int)
-// NOT USED - Originally intended for fault-tolerance
-// Metadata for a Kafka Stream that it sent to the Master
-private[streaming]
-case class KafkaInputDStreamMetadata(timestamp: Long, data: Map[KafkaPartitionKey, Long])
-// NOT USED - Originally intended for fault-tolerance
-// Checkpoint data specific to a KafkaInputDstream
-private[streaming]
-case class KafkaDStreamCheckpointData(kafkaRdds: HashMap[Time, Any],
-  savedOffsets: Map[KafkaPartitionKey, Long]) extends DStreamCheckpointData(kafkaRdds)
 
 /**
  * Input stream that pulls messages from a Kafka Broker.
@@ -52,49 +43,6 @@ class KafkaInputDStream[T: ClassManifest](
     storageLevel: StorageLevel
   ) extends NetworkInputDStream[T](ssc_ ) with Logging {
 
-  // Metadata that keeps track of which messages have already been consumed.
-  var savedOffsets = HashMap[Long, Map[KafkaPartitionKey, Long]]()
-  
-  /* NOT USED - Originally intended for fault-tolerance
- 
-  // In case of a failure, the offets for a particular timestamp will be restored.
-  @transient var restoredOffsets : Map[KafkaPartitionKey, Long] = null
-
- 
-  override protected[streaming] def addMetadata(metadata: Any) {
-    metadata match {
-      case x : KafkaInputDStreamMetadata =>
-        savedOffsets(x.timestamp) = x.data
-        // TOOD: Remove logging
-        logInfo("New saved Offsets: " + savedOffsets)
-      case _ => logInfo("Received unknown metadata: " + metadata.toString)
-    }
-  }
-
-  override protected[streaming] def updateCheckpointData(currentTime: Time) {
-    super.updateCheckpointData(currentTime)
-    if(savedOffsets.size > 0) {
-      // Find the offets that were stored before the checkpoint was initiated
-      val key = savedOffsets.keys.toList.sortWith(_ < _).filter(_ < currentTime.millis).last
-      val latestOffsets = savedOffsets(key)
-      logInfo("Updating KafkaDStream checkpoint data: " + latestOffsets.toString)
-      checkpointData = KafkaDStreamCheckpointData(checkpointData.rdds, latestOffsets)
-      // TODO: This may throw out offsets that are created after the checkpoint,
-      // but it's unlikely we'll need them.
-      savedOffsets.clear()
-    }
-  }
-
-  override protected[streaming] def restoreCheckpointData() {
-    super.restoreCheckpointData()
-    logInfo("Restoring KafkaDStream checkpoint data.")
-    checkpointData match { 
-      case x : KafkaDStreamCheckpointData => 
-        restoredOffsets = x.savedOffsets
-        logInfo("Restored KafkaDStream offsets: " + savedOffsets)
-    }
-  } */
-
   def createReceiver(): NetworkReceiver[T] = {
     new KafkaReceiver(host, port,  groupId, topics, initialOffsets, storageLevel)
         .asInstanceOf[NetworkReceiver[T]]
@@ -111,8 +59,6 @@ class KafkaReceiver(host: String, port: Int, groupId: String,
 
   // Handles pushing data into the BlockManager
   lazy protected val blockGenerator = new BlockGenerator(storageLevel)
-  // Keeps track of the current offsets. Maps from (broker, topic, group, part) -> Offset
-  lazy val offsets = HashMap[KafkaPartitionKey, Long]()
   // Connection to Kafka
   var consumerConnector : ZookeeperConsumerConnector = null
 
@@ -143,8 +89,8 @@ class KafkaReceiver(host: String, port: Int, groupId: String,
     consumerConnector = Consumer.create(consumerConfig).asInstanceOf[ZookeeperConsumerConnector]
     logInfo("Connected to " + zooKeeperEndPoint)
 
-    // Reset the Kafka offsets in case we are recovering from a failure
-    resetOffsets(initialOffsets)
+    // If specified, set the topic offset
+    setOffsets(initialOffsets)
 
     // Create Threads for each Topic/Message Stream we are listening
     val topicMessageStreams = consumerConnector.createMessageStreams(topics, new StringDecoder())
@@ -157,7 +103,7 @@ class KafkaReceiver(host: String, port: Int, groupId: String,
   }
 
   // Overwrites the offets in Zookeper.
-  private def resetOffsets(offsets: Map[KafkaPartitionKey, Long]) {
+  private def setOffsets(offsets: Map[KafkaPartitionKey, Long]) {
     offsets.foreach { case(key, offset) =>
       val topicDirs = new ZKGroupTopicDirs(key.groupId, key.topic)
       val partitionName = key.brokerId + "-" + key.partId
@@ -178,16 +124,4 @@ class KafkaReceiver(host: String, port: Int, groupId: String,
       }  
     }
   }
-
-  // NOT USED - Originally intended for fault-tolerance
-  // class KafkaDataHandler(receiver: KafkaReceiver, storageLevel: StorageLevel) 
-  // extends BufferingBlockCreator[Any](receiver, storageLevel) {
-
-  //   override def createBlock(blockId: String, iterator: Iterator[Any]) : Block = {
-  //     // Creates a new Block with Kafka-specific Metadata
-  //     new Block(blockId, iterator, KafkaInputDStreamMetadata(System.currentTimeMillis, offsets.toMap))
-  //   }
-
-  // }
-
 }

From d3064fe70762cbfcb7dbd5e1fbd708539c3de5e9 Mon Sep 17 00:00:00 2001
From: seanm <sean.mcnamara@webtrends.com>
Date: Fri, 18 Jan 2013 21:34:29 -0700
Subject: [PATCH 116/291] kafkaStream API cleanup. A quorum of zookeepers can
 now be specified

---
 .../streaming/examples/KafkaWordCount.scala     | 16 ++++++++--------
 .../spark/streaming/StreamingContext.scala      |  8 +++-----
 .../streaming/dstream/KafkaInputDStream.scala   | 17 +++++++----------
 3 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala b/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala
index fe55db6e2c..65d5da82fc 100644
--- a/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala
+++ b/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala
@@ -13,19 +13,19 @@ import spark.streaming.util.RawTextHelper._
 object KafkaWordCount {
   def main(args: Array[String]) {
     
-    if (args.length < 6) {
-      System.err.println("Usage: KafkaWordCount <master> <hostname> <port> <group> <topics> <numThreads>")
+    if (args.length < 5) {
+      System.err.println("Usage: KafkaWordCount <master> <zkQuorum> <group> <topics> <numThreads>")
       System.exit(1)
     }
 
-    val Array(master, hostname, port, group, topics, numThreads) = args
+    val Array(master, zkQuorum, group, topics, numThreads) = args
 
     val sc = new SparkContext(master, "KafkaWordCount")
     val ssc =  new StreamingContext(sc, Seconds(2))
     ssc.checkpoint("checkpoint")
 
     val topicpMap = topics.split(",").map((_,numThreads.toInt)).toMap
-    val lines = ssc.kafkaStream[String](hostname, port.toInt, group, topicpMap)
+    val lines = ssc.kafkaStream[String](zkQuorum, group, topicpMap)
     val words = lines.flatMap(_.split(" "))
     val wordCounts = words.map(x => (x, 1l)).reduceByKeyAndWindow(add _, subtract _, Minutes(10), Seconds(2), 2)
     wordCounts.print()
@@ -38,16 +38,16 @@ object KafkaWordCount {
 object KafkaWordCountProducer {
 
   def main(args: Array[String]) {
-    if (args.length < 3) {
-      System.err.println("Usage: KafkaWordCountProducer <hostname> <port> <topic> <messagesPerSec> <wordsPerMessage>")
+    if (args.length < 2) {
+      System.err.println("Usage: KafkaWordCountProducer <zkQuorum> <topic> <messagesPerSec> <wordsPerMessage>")
       System.exit(1)
     }
 
-    val Array(hostname, port, topic, messagesPerSec, wordsPerMessage) = args
+    val Array(zkQuorum, topic, messagesPerSec, wordsPerMessage) = args
 
     // Zookeper connection properties
     val props = new Properties()
-    props.put("zk.connect", hostname + ":" + port)
+    props.put("zk.connect", zkQuorum)
     props.put("serializer.class", "kafka.serializer.StringEncoder")
     
     val config = new ProducerConfig(props)
diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
index 14500bdcb1..06cf7a06ed 100644
--- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
@@ -136,8 +136,7 @@ class StreamingContext private (
 
   /**
    * Create an input stream that pulls messages form a Kafka Broker.
-   * @param hostname Zookeper hostname.
-   * @param port Zookeper port.
+   * @param zkQuorum Zookeper quorum (hostname:port,hostname:port,..).
    * @param groupId The group id for this consumer.
    * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
    * in its own thread.
@@ -146,14 +145,13 @@ class StreamingContext private (
    * @param storageLevel RDD storage level. Defaults to memory-only.
    */
   def kafkaStream[T: ClassManifest](
-      hostname: String,
-      port: Int,
+      zkQuorum: String,
       groupId: String,
       topics: Map[String, Int],
       initialOffsets: Map[KafkaPartitionKey, Long] = Map[KafkaPartitionKey, Long](),
       storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY_SER_2
     ): DStream[T] = {
-    val inputStream = new KafkaInputDStream[T](this, hostname, port, groupId, topics, initialOffsets, storageLevel)
+    val inputStream = new KafkaInputDStream[T](this, zkQuorum, groupId, topics, initialOffsets, storageLevel)
     registerInputStream(inputStream)
     inputStream
   }
diff --git a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
index 533c91ee95..4f8c8b9d10 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
@@ -23,8 +23,7 @@ case class KafkaPartitionKey(brokerId: Int, topic: String, groupId: String, part
 /**
  * Input stream that pulls messages from a Kafka Broker.
  * 
- * @param host Zookeper hostname.
- * @param port Zookeper port.
+ * @param zkQuorum Zookeper quorum (hostname:port,hostname:port,..).
  * @param groupId The group id for this consumer.
  * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
  * in its own thread.
@@ -35,8 +34,7 @@ case class KafkaPartitionKey(brokerId: Int, topic: String, groupId: String, part
 private[streaming]
 class KafkaInputDStream[T: ClassManifest](
     @transient ssc_ : StreamingContext,
-    host: String,
-    port: Int,
+    zkQuorum: String,
     groupId: String,
     topics: Map[String, Int],
     initialOffsets: Map[KafkaPartitionKey, Long],
@@ -44,13 +42,13 @@ class KafkaInputDStream[T: ClassManifest](
   ) extends NetworkInputDStream[T](ssc_ ) with Logging {
 
   def createReceiver(): NetworkReceiver[T] = {
-    new KafkaReceiver(host, port,  groupId, topics, initialOffsets, storageLevel)
+    new KafkaReceiver(zkQuorum,  groupId, topics, initialOffsets, storageLevel)
         .asInstanceOf[NetworkReceiver[T]]
   }
 }
 
 private[streaming]
-class KafkaReceiver(host: String, port: Int, groupId: String,
+class KafkaReceiver(zkQuorum: String, groupId: String,
   topics: Map[String, Int], initialOffsets: Map[KafkaPartitionKey, Long], 
   storageLevel: StorageLevel) extends NetworkReceiver[Any] {
 
@@ -73,21 +71,20 @@ class KafkaReceiver(host: String, port: Int, groupId: String,
     // In case we are using multiple Threads to handle Kafka Messages
     val executorPool = Executors.newFixedThreadPool(topics.values.reduce(_ + _))
 
-    val zooKeeperEndPoint = host + ":" + port
     logInfo("Starting Kafka Consumer Stream with group: " + groupId)
     logInfo("Initial offsets: " + initialOffsets.toString)
     
     // Zookeper connection properties
     val props = new Properties()
-    props.put("zk.connect", zooKeeperEndPoint)
+    props.put("zk.connect", zkQuorum)
     props.put("zk.connectiontimeout.ms", ZK_TIMEOUT.toString)
     props.put("groupid", groupId)
 
     // Create the connection to the cluster
-    logInfo("Connecting to Zookeper: " + zooKeeperEndPoint)
+    logInfo("Connecting to Zookeper: " + zkQuorum)
     val consumerConfig = new ConsumerConfig(props)
     consumerConnector = Consumer.create(consumerConfig).asInstanceOf[ZookeeperConsumerConnector]
-    logInfo("Connected to " + zooKeeperEndPoint)
+    logInfo("Connected to " + zkQuorum)
 
     // If specified, set the topic offset
     setOffsets(initialOffsets)

From ecdff861f7993251163b82e737aba6bb1bb814d8 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Sat, 19 Jan 2013 22:59:35 -0800
Subject: [PATCH 117/291] Clarifying log directory in EC2 guide

---
 docs/ec2-scripts.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md
index 6e1f7fd3b1..8b069ca9ad 100644
--- a/docs/ec2-scripts.md
+++ b/docs/ec2-scripts.md
@@ -96,7 +96,8 @@ permissions on your private key file, you can run `launch` with the
     `spark-ec2` to attach a persistent EBS volume to each node for
     storing the persistent HDFS.
 -   Finally, if you get errors while running your jobs, look at the slave's logs
-    for that job using the Mesos web UI (`http://<master-hostname>:8080`).
+    for that job inside of the Mesos work directory (/mnt/mesos-work). Mesos errors 
+    can be found using the Mesos web UI (`http://<master-hostname>:8080`).
 
 # Configuration
 

From 214345ceace634ec9cc83c4c85b233b699e0d219 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Sat, 19 Jan 2013 23:50:17 -0800
Subject: [PATCH 118/291] Fixed issue
 https://spark-project.atlassian.net/browse/STREAMING-29, along with updates
 to doc comments in SparkContext.checkpoint().

---
 core/src/main/scala/spark/RDD.scala             | 17 ++++++++---------
 .../main/scala/spark/RDDCheckpointData.scala    |  2 +-
 core/src/main/scala/spark/SparkContext.scala    | 13 +++++++------
 .../main/scala/spark/streaming/DStream.scala    |  8 +++++++-
 4 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index a9f2e86455..e0d2eabb1d 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -549,17 +549,16 @@ abstract class RDD[T: ClassManifest](
   }
 
   /**
-   * Mark this RDD for checkpointing. The RDD will be saved to a file inside `checkpointDir`
-   * (set using setCheckpointDir()) and all references to its parent RDDs will be removed.
-   * This is used to truncate very long lineages. In the current implementation, Spark will save
-   * this RDD to a file (using saveAsObjectFile()) after the first job using this RDD is done.
-   * Hence, it is strongly recommended to use checkpoint() on RDDs when
-   * (i) checkpoint() is called before the any job has been executed on this RDD.
-   * (ii) This RDD has been made to persist in memory. Otherwise saving it on a file will
-   * require recomputation.
+   * Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint
+   * directory set with SparkContext.setCheckpointDir() and all references to its parent
+   * RDDs will be removed. This function must be called before any job has been
+   * executed on this RDD. It is strongly recommended that this RDD is persisted in
+   * memory, otherwise saving it on a file will require recomputation.
    */
   def checkpoint() {
-    if (checkpointData.isEmpty) {
+    if (context.checkpointDir.isEmpty) {
+      throw new Exception("Checkpoint directory has not been set in the SparkContext")
+    } else if (checkpointData.isEmpty) {
       checkpointData = Some(new RDDCheckpointData(this))
       checkpointData.get.markForCheckpoint()
     }
diff --git a/core/src/main/scala/spark/RDDCheckpointData.scala b/core/src/main/scala/spark/RDDCheckpointData.scala
index d845a522e4..18df530b7d 100644
--- a/core/src/main/scala/spark/RDDCheckpointData.scala
+++ b/core/src/main/scala/spark/RDDCheckpointData.scala
@@ -63,7 +63,7 @@ extends Logging with Serializable {
     }
 
     // Save to file, and reload it as an RDD
-    val path = new Path(rdd.context.checkpointDir, "rdd-" + rdd.id).toString
+    val path = new Path(rdd.context.checkpointDir.get, "rdd-" + rdd.id).toString
     rdd.context.runJob(rdd, CheckpointRDD.writeToFile(path) _)
     val newRDD = new CheckpointRDD[T](rdd.context, path)
 
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 88cf357ebf..7f3259d982 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -184,7 +184,7 @@ class SparkContext(
 
   private var dagScheduler = new DAGScheduler(taskScheduler)
 
-  private[spark] var checkpointDir: String = null
+  private[spark] var checkpointDir: Option[String] = None
 
   // Methods for creating RDDs
 
@@ -595,10 +595,11 @@ class SparkContext(
   }
 
   /**
-   * Set the directory under which RDDs are going to be checkpointed. This method will
-   * create this directory and will throw an exception of the path already exists (to avoid
-   * overwriting existing files may be overwritten). The directory will be deleted on exit
-   * if indicated.
+   * Set the directory under which RDDs are going to be checkpointed. The directory must
+   * be a HDFS path if running on a cluster. If the directory does not exist, it will
+   * be created. If the directory exists and useExisting is set to true, then the
+   * exisiting directory will be used. Otherwise an exception will be thrown to
+   * prevent accidental overriding of checkpoint files in the existing directory.
    */
   def setCheckpointDir(dir: String, useExisting: Boolean = false) {
     val path = new Path(dir)
@@ -610,7 +611,7 @@ class SparkContext(
         fs.mkdirs(path)
       }
     }
-    checkpointDir = dir
+    checkpointDir = Some(dir)
   }
 
   /** Default level of parallelism to use when not given by user (e.g. for reduce tasks) */
diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index fbe3cebd6d..c4442b6a0c 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -154,10 +154,16 @@ abstract class DStream[T: ClassManifest] (
 
     assert(
       !mustCheckpoint || checkpointDuration != null,
-      "The checkpoint interval for " + this.getClass.getSimpleName + " has not been set. " +
+      "The checkpoint interval for " + this.getClass.getSimpleName + " has not been set." +
         " Please use DStream.checkpoint() to set the interval."
     )
 
+    assert(
+     checkpointDuration == null || ssc.sc.checkpointDir.isDefined,
+      "The checkpoint directory has not been set. Please use StreamingContext.checkpoint()" +
+      " or SparkContext.checkpoint() to set the checkpoint directory."
+    )
+
     assert(
       checkpointDuration == null || checkpointDuration >= slideDuration,
       "The checkpoint interval for " + this.getClass.getSimpleName + " has been set to " +

From 8e7f098a2c9e5e85cb9435f28d53a3a5847c14aa Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 01:57:44 -0800
Subject: [PATCH 119/291] Added accumulators to PySpark

---
 .../scala/spark/api/python/PythonRDD.scala    |  83 +++++++--
 python/pyspark/__init__.py                    |   4 +
 python/pyspark/accumulators.py                | 166 ++++++++++++++++++
 python/pyspark/context.py                     |  38 ++++
 python/pyspark/rdd.py                         |   2 +-
 python/pyspark/serializers.py                 |   7 +-
 python/pyspark/shell.py                       |   4 +-
 python/pyspark/worker.py                      |   7 +-
 8 files changed, 290 insertions(+), 21 deletions(-)
 create mode 100644 python/pyspark/accumulators.py

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index f431ef28d3..fb13e84658 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -1,7 +1,8 @@
 package spark.api.python
 
 import java.io._
-import java.util.{List => JList}
+import java.net._
+import java.util.{List => JList, ArrayList => JArrayList, Collections}
 
 import scala.collection.JavaConversions._
 import scala.io.Source
@@ -10,25 +11,26 @@ import spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
 import spark.broadcast.Broadcast
 import spark._
 import spark.rdd.PipedRDD
-import java.util
 
 
 private[spark] class PythonRDD[T: ClassManifest](
-  parent: RDD[T],
-  command: Seq[String],
-  envVars: java.util.Map[String, String],
-  preservePartitoning: Boolean,
-  pythonExec: String,
-  broadcastVars: java.util.List[Broadcast[Array[Byte]]])
+    parent: RDD[T],
+    command: Seq[String],
+    envVars: java.util.Map[String, String],
+    preservePartitoning: Boolean,
+    pythonExec: String,
+    broadcastVars: JList[Broadcast[Array[Byte]]],
+    accumulator: Accumulator[JList[Array[Byte]]])
   extends RDD[Array[Byte]](parent.context) {
 
   // Similar to Runtime.exec(), if we are given a single string, split it into words
   // using a standard StringTokenizer (i.e. by spaces)
   def this(parent: RDD[T], command: String, envVars: java.util.Map[String, String],
-    preservePartitoning: Boolean, pythonExec: String,
-    broadcastVars: java.util.List[Broadcast[Array[Byte]]]) =
+      preservePartitoning: Boolean, pythonExec: String,
+      broadcastVars: JList[Broadcast[Array[Byte]]],
+      accumulator: Accumulator[JList[Array[Byte]]]) =
     this(parent, PipedRDD.tokenize(command), envVars, preservePartitoning, pythonExec,
-      broadcastVars)
+      broadcastVars, accumulator)
 
   override def splits = parent.splits
 
@@ -93,18 +95,30 @@ private[spark] class PythonRDD[T: ClassManifest](
     // Return an iterator that read lines from the process's stdout
     val stream = new DataInputStream(proc.getInputStream)
     return new Iterator[Array[Byte]] {
-      def next() = {
+      def next(): Array[Byte] = {
         val obj = _nextObj
         _nextObj = read()
         obj
       }
 
-      private def read() = {
+      private def read(): Array[Byte] = {
         try {
           val length = stream.readInt()
-          val obj = new Array[Byte](length)
-          stream.readFully(obj)
-          obj
+          if (length != -1) {
+            val obj = new Array[Byte](length)
+            stream.readFully(obj)
+            obj
+          } else {
+            // We've finished the data section of the output, but we can still read some
+            // accumulator updates; let's do that, breaking when we get EOFException
+            while (true) {
+              val len2 = stream.readInt()
+              val update = new Array[Byte](len2)
+              stream.readFully(update)
+              accumulator += Collections.singletonList(update)
+            }
+            new Array[Byte](0)
+          }
         } catch {
           case eof: EOFException => {
             val exitStatus = proc.waitFor()
@@ -246,3 +260,40 @@ private class ExtractValue extends spark.api.java.function.Function[(Array[Byte]
 private class BytesToString extends spark.api.java.function.Function[Array[Byte], String] {
   override def call(arr: Array[Byte]) : String = new String(arr, "UTF-8")
 }
+
+/**
+ * Internal class that acts as an `AccumulatorParam` for Python accumulators. Inside, it
+ * collects a list of pickled strings that we pass to Python through a socket.
+ */
+class PythonAccumulatorParam(@transient serverHost: String, serverPort: Int)
+  extends AccumulatorParam[JList[Array[Byte]]] {
+  
+  override def zero(value: JList[Array[Byte]]): JList[Array[Byte]] = new JArrayList
+
+  override def addInPlace(val1: JList[Array[Byte]], val2: JList[Array[Byte]])
+      : JList[Array[Byte]] = {
+    if (serverHost == null) {
+      // This happens on the worker node, where we just want to remember all the updates
+      val1.addAll(val2)
+      val1
+    } else {
+      // This happens on the master, where we pass the updates to Python through a socket
+      val socket = new Socket(serverHost, serverPort)
+      val in = socket.getInputStream
+      val out = new DataOutputStream(socket.getOutputStream)
+      out.writeInt(val2.size)
+      for (array <- val2) {
+        out.writeInt(array.length)
+        out.write(array)
+      }
+      out.flush()
+      // Wait for a byte from the Python side as an acknowledgement
+      val byteRead = in.read()
+      if (byteRead == -1) {
+        throw new SparkException("EOF reached before Python server acknowledged")
+      }
+      socket.close()
+      null
+    }
+  }
+}
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index c595ae0842..00666bc0a3 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -7,6 +7,10 @@ Public classes:
         Main entry point for Spark functionality.
     - L{RDD<pyspark.rdd.RDD>}
         A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+    - L{Broadcast<pyspark.broadcast.Broadcast>}
+        A broadcast variable that gets reused across tasks.
+    - L{Accumulator<pyspark.accumulators.Accumulator>}
+        An "add-only" shared variable that tasks can only add values to.
 """
 import sys
 import os
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
new file mode 100644
index 0000000000..438af4cfc0
--- /dev/null
+++ b/python/pyspark/accumulators.py
@@ -0,0 +1,166 @@
+"""
+>>> from pyspark.context import SparkContext
+>>> sc = SparkContext('local', 'test')
+>>> a = sc.accumulator(1)
+>>> a.value
+1
+>>> a.value = 2
+>>> a.value
+2
+>>> a += 5
+>>> a.value
+7
+
+>>> rdd = sc.parallelize([1,2,3])
+>>> def f(x):
+...     global a
+...     a += x
+>>> rdd.foreach(f)
+>>> a.value
+13
+
+>>> class VectorAccumulatorParam(object):
+...     def zero(self, value):
+...         return [0.0] * len(value)
+...     def addInPlace(self, val1, val2):
+...         for i in xrange(len(val1)):
+...              val1[i] += val2[i]
+...         return val1
+>>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
+>>> va.value
+[1.0, 2.0, 3.0]
+>>> def g(x):
+...     global va
+...     va += [x] * 3
+>>> rdd.foreach(g)
+>>> va.value
+[7.0, 8.0, 9.0]
+
+>>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL
+Traceback (most recent call last):
+    ...
+Py4JJavaError:...
+
+>>> def h(x):
+...     global a
+...     a.value = 7
+>>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL
+Traceback (most recent call last):
+    ...
+Py4JJavaError:...
+
+>>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
+Traceback (most recent call last):
+    ...
+Exception:...
+"""
+
+import struct
+import SocketServer
+import threading
+from pyspark.cloudpickle import CloudPickler
+from pyspark.serializers import read_int, read_with_length, load_pickle
+
+
+# Holds accumulators registered on the current machine, keyed by ID. This is then used to send
+# the local accumulator updates back to the driver program at the end of a task.
+_accumulatorRegistry = {}
+
+
+def _deserialize_accumulator(aid, zero_value, accum_param):
+    from pyspark.accumulators import _accumulatorRegistry
+    accum = Accumulator(aid, zero_value, accum_param)
+    accum._deserialized = True
+    _accumulatorRegistry[aid] = accum
+    return accum
+
+
+class Accumulator(object):
+    def __init__(self, aid, value, accum_param):
+        """Create a new Accumulator with a given initial value and AccumulatorParam object"""
+        from pyspark.accumulators import _accumulatorRegistry
+        self.aid = aid
+        self.accum_param = accum_param
+        self._value = value
+        self._deserialized = False
+        _accumulatorRegistry[aid] = self
+
+    def __reduce__(self):
+        """Custom serialization; saves the zero value from our AccumulatorParam"""
+        param = self.accum_param
+        return (_deserialize_accumulator, (self.aid, param.zero(self._value), param))
+
+    @property
+    def value(self):
+        """Get the accumulator's value; only usable in driver program"""
+        if self._deserialized:
+            raise Exception("Accumulator.value cannot be accessed inside tasks")
+        return self._value
+
+    @value.setter
+    def value(self, value):
+        """Sets the accumulator's value; only usable in driver program"""
+        if self._deserialized:
+            raise Exception("Accumulator.value cannot be accessed inside tasks")
+        self._value = value
+
+    def __iadd__(self, term):
+        """The += operator; adds a term to this accumulator's value"""
+        self._value = self.accum_param.addInPlace(self._value, term)
+        return self
+
+    def __str__(self):
+        return str(self._value)
+
+
+class AddingAccumulatorParam(object):
+    """
+    An AccumulatorParam that uses the + operators to add values. Designed for simple types
+    such as integers, floats, and lists. Requires the zero value for the underlying type
+    as a parameter.
+    """
+
+    def __init__(self, zero_value):
+        self.zero_value = zero_value
+
+    def zero(self, value):
+        return self.zero_value
+
+    def addInPlace(self, value1, value2):
+        value1 += value2
+        return value1
+
+
+# Singleton accumulator params for some standard types
+INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)
+DOUBLE_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
+COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
+
+
+class _UpdateRequestHandler(SocketServer.StreamRequestHandler):
+    def handle(self):
+        from pyspark.accumulators import _accumulatorRegistry
+        num_updates = read_int(self.rfile)
+        for _ in range(num_updates):
+            (aid, update) = load_pickle(read_with_length(self.rfile))
+            _accumulatorRegistry[aid] += update
+        # Write a byte in acknowledgement
+        self.wfile.write(struct.pack("!b", 1))
+
+
+def _start_update_server():
+    """Start a TCP server to receive accumulator updates in a daemon thread, and returns it"""
+    server = SocketServer.TCPServer(("localhost", 0), _UpdateRequestHandler)
+    thread = threading.Thread(target=server.serve_forever)
+    thread.daemon = True
+    thread.start()
+    return server
+
+
+def _test():
+    import doctest
+    doctest.testmod()
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index e486f206b0..1e2f845f9c 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -2,6 +2,8 @@ import os
 import atexit
 from tempfile import NamedTemporaryFile
 
+from pyspark import accumulators
+from pyspark.accumulators import Accumulator
 from pyspark.broadcast import Broadcast
 from pyspark.java_gateway import launch_gateway
 from pyspark.serializers import dump_pickle, write_with_length, batched
@@ -22,6 +24,7 @@ class SparkContext(object):
     _readRDDFromPickleFile = jvm.PythonRDD.readRDDFromPickleFile
     _writeIteratorToPickleFile = jvm.PythonRDD.writeIteratorToPickleFile
     _takePartition = jvm.PythonRDD.takePartition
+    _next_accum_id = 0
 
     def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
         environment=None, batchSize=1024):
@@ -52,6 +55,14 @@ class SparkContext(object):
         self._jsc = self.jvm.JavaSparkContext(master, jobName, sparkHome,
                                               empty_string_array)
 
+        # Create a single Accumulator in Java that we'll send all our updates through;
+        # they will be passed back to us through a TCP server
+        self._accumulatorServer = accumulators._start_update_server()
+        (host, port) = self._accumulatorServer.server_address
+        self._javaAccumulator = self._jsc.accumulator(
+                self.jvm.java.util.ArrayList(),
+                self.jvm.PythonAccumulatorParam(host, port))
+
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
@@ -74,6 +85,8 @@ class SparkContext(object):
     def __del__(self):
         if self._jsc:
             self._jsc.stop()
+        if self._accumulatorServer:
+            self._accumulatorServer.shutdown()
 
     def stop(self):
         """
@@ -129,6 +142,31 @@ class SparkContext(object):
         return Broadcast(jbroadcast.id(), value, jbroadcast,
                          self._pickled_broadcast_vars)
 
+    def accumulator(self, value, accum_param=None):
+        """
+        Create an C{Accumulator} with the given initial value, using a given
+        AccumulatorParam helper object to define how to add values of the data 
+        type if provided. Default AccumulatorParams are used for integers and
+        floating-point numbers if you do not provide one. For other types, the
+        AccumulatorParam must implement two methods:
+        - C{zero(value)}: provide a "zero value" for the type, compatible in
+          dimensions with the provided C{value} (e.g., a zero vector).
+        - C{addInPlace(val1, val2)}: add two values of the accumulator's data
+          type, returning a new value; for efficiency, can also update C{val1}
+          in place and return it.
+        """
+        if accum_param == None:
+            if isinstance(value, int):
+                accum_param = accumulators.INT_ACCUMULATOR_PARAM
+            elif isinstance(value, float):
+                accum_param = accumulators.FLOAT_ACCUMULATOR_PARAM
+            elif isinstance(value, complex):
+                accum_param = accumulators.COMPLEX_ACCUMULATOR_PARAM
+            else:
+                raise Exception("No default accumulator param for type %s" % type(value))
+        SparkContext._next_accum_id += 1
+        return Accumulator(SparkContext._next_accum_id - 1, value, accum_param)
+
     def addFile(self, path):
         """
         Add a file to be downloaded into the working directory of this Spark
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 1d36da42b0..d705f0f9e1 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -703,7 +703,7 @@ class PipelinedRDD(RDD):
         env = MapConverter().convert(env, self.ctx.gateway._gateway_client)
         python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
             pipe_command, env, self.preservesPartitioning, self.ctx.pythonExec,
-            broadcast_vars, class_manifest)
+            broadcast_vars, self.ctx._javaAccumulator, class_manifest)
         self._jrdd_val = python_rdd.asJavaRDD()
         return self._jrdd_val
 
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 9a5151ea00..115cf28cc2 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -52,8 +52,13 @@ def read_int(stream):
         raise EOFError
     return struct.unpack("!i", length)[0]
 
+
+def write_int(value, stream):
+    stream.write(struct.pack("!i", value))
+
+
 def write_with_length(obj, stream):
-    stream.write(struct.pack("!i", len(obj)))
+    write_int(len(obj), stream)
     stream.write(obj)
 
 
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 7e6ad3aa76..f6328c561f 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -1,7 +1,7 @@
 """
 An interactive shell.
 
-This fle is designed to be launched as a PYTHONSTARTUP script.
+This file is designed to be launched as a PYTHONSTARTUP script.
 """
 import os
 from pyspark.context import SparkContext
@@ -14,4 +14,4 @@ print "Spark context avaiable as sc."
 # which allows us to execute the user's PYTHONSTARTUP file:
 _pythonstartup = os.environ.get('OLD_PYTHONSTARTUP')
 if _pythonstartup and os.path.isfile(_pythonstartup):
-        execfile(_pythonstartup)
+    execfile(_pythonstartup)
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 3d792bbaa2..b2b9288089 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -5,9 +5,10 @@ import sys
 from base64 import standard_b64decode
 # CloudPickler needs to be imported so that depicklers are registered using the
 # copy_reg module.
+from pyspark.accumulators import _accumulatorRegistry
 from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.cloudpickle import CloudPickler
-from pyspark.serializers import write_with_length, read_with_length, \
+from pyspark.serializers import write_with_length, read_with_length, write_int, \
     read_long, read_int, dump_pickle, load_pickle, read_from_pickle_file
 
 
@@ -36,6 +37,10 @@ def main():
     iterator = read_from_pickle_file(sys.stdin)
     for obj in func(split_index, iterator):
         write_with_length(dumps(obj), old_stdout)
+    # Mark the beginning of the accumulators section of the output
+    write_int(-1, old_stdout)
+    for aid, accum in _accumulatorRegistry.items():
+        write_with_length(dump_pickle((aid, accum._value)), old_stdout)
 
 
 if __name__ == '__main__':

From 61b6382a352f3e801643529198b867e13debf470 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 01:59:07 -0800
Subject: [PATCH 120/291] Launch accumulator tests in run-tests

---
 python/run-tests | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/run-tests b/python/run-tests
index fcdd1e27a7..32470911f9 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -11,6 +11,9 @@ FAILED=$(($?||$FAILED))
 $FWDIR/pyspark -m doctest pyspark/broadcast.py
 FAILED=$(($?||$FAILED))
 
+$FWDIR/pyspark -m doctest pyspark/accumulators.py
+FAILED=$(($?||$FAILED))
+
 if [[ $FAILED != 0 ]]; then
     echo -en "\033[31m"  # Red
     echo "Had test failures; see logs."

From a23ed25f3cd6e76784f831d0ab7de7d3e193b59f Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 02:10:25 -0800
Subject: [PATCH 121/291] Add a class comment to Accumulator

---
 python/pyspark/accumulators.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 438af4cfc0..c00c3a37af 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -76,6 +76,18 @@ def _deserialize_accumulator(aid, zero_value, accum_param):
 
 
 class Accumulator(object):
+    """
+    A shared variable that can be accumulated, i.e., has a commutative and associative "add"
+    operation. Worker tasks on a Spark cluster can add values to an Accumulator with the C{+=}
+    operator, but only the driver program is allowed to access its value, using C{value}.
+    Updates from the workers get propagated automatically to the driver program.
+
+    While C{SparkContext} supports accumulators for primitive data types like C{int} and
+    C{float}, users can also define accumulators for custom types by providing a custom
+    C{AccumulatorParam} object with a C{zero} and C{addInPlace} method. Refer to the doctest
+    of this module for an example.
+    """
+
     def __init__(self, aid, value, accum_param):
         """Create a new Accumulator with a given initial value and AccumulatorParam object"""
         from pyspark.accumulators import _accumulatorRegistry

From ee5a07955c222dce16d0ffb9bde7f61033763c16 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 02:11:58 -0800
Subject: [PATCH 122/291] Fix Python guide to say accumulators are available

---
 docs/python-programming-guide.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index 78ef310a00..a840b9b34b 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -16,7 +16,6 @@ There are a few key differences between the Python and Scala APIs:
 
 * Python is dynamically typed, so RDDs can hold objects of different types.
 * PySpark does not currently support the following Spark features:
-    - Accumulators
     - Special functions on RDDs of doubles, such as `mean` and `stdev`
     - `lookup`
     - `persist` at storage levels other than `MEMORY_ONLY`

From 33bad85bb9143d41bc5de2068f7e8a8c39928225 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Sun, 20 Jan 2013 03:51:11 -0800
Subject: [PATCH 123/291] Fixed streaming testsuite bugs

---
 streaming/src/test/java/JavaAPISuite.java                   | 2 ++
 .../test/scala/spark/streaming/BasicOperationsSuite.scala   | 5 +++++
 .../src/test/scala/spark/streaming/CheckpointSuite.scala    | 6 +++---
 streaming/src/test/scala/spark/streaming/FailureSuite.scala | 3 +++
 .../src/test/scala/spark/streaming/InputStreamsSuite.scala  | 3 +++
 .../src/test/scala/spark/streaming/TestSuiteBase.scala      | 6 +++---
 .../test/scala/spark/streaming/WindowOperationsSuite.scala  | 5 +++++
 7 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/streaming/src/test/java/JavaAPISuite.java b/streaming/src/test/java/JavaAPISuite.java
index 8c94e13e65..c84e7331c7 100644
--- a/streaming/src/test/java/JavaAPISuite.java
+++ b/streaming/src/test/java/JavaAPISuite.java
@@ -34,12 +34,14 @@ public class JavaAPISuite implements Serializable {
   @Before
   public void setUp() {
     ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+    ssc.checkpoint("checkpoint", new Duration(1000));
   }
 
   @After
   public void tearDown() {
     ssc.stop();
     ssc = null;
+
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.master.port");
   }
diff --git a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
index f73f9b1823..bfdf32c73e 100644
--- a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
@@ -8,6 +8,11 @@ class BasicOperationsSuite extends TestSuiteBase {
 
   override def framework() = "BasicOperationsSuite"
 
+  after {
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.master.port")
+  }
+
   test("map") {
     val input = Seq(1 to 4, 5 to 8, 9 to 12)
     testOperation(
diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
index 920388bba9..d2f32c189b 100644
--- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
@@ -15,9 +15,11 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
   }
 
   after {
-
     if (ssc != null) ssc.stop()
     FileUtils.deleteDirectory(new File(checkpointDir))
+
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.master.port")
   }
 
   var ssc: StreamingContext = null
@@ -26,8 +28,6 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
 
   override def batchDuration = Milliseconds(500)
 
-  override def checkpointDir = "checkpoint"
-
   override def checkpointInterval = batchDuration
 
   override def actuallyWait = true
diff --git a/streaming/src/test/scala/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/spark/streaming/FailureSuite.scala
index 4aa428bf64..7493ac1207 100644
--- a/streaming/src/test/scala/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/FailureSuite.scala
@@ -22,6 +22,9 @@ class FailureSuite extends TestSuiteBase with BeforeAndAfter {
   after {
     FailureSuite.reset()
     FileUtils.deleteDirectory(new File(checkpointDir))
+
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.master.port")
   }
 
   override def framework = "CheckpointSuite"
diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
index e71ba6ddc1..d7ba7a5d17 100644
--- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
@@ -40,6 +40,9 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       FileUtils.deleteDirectory(testDir)
       testDir = null
     }
+
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.master.port")
   }
 
   test("network input stream") {
diff --git a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
index a76f61d4ad..49129f3964 100644
--- a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
@@ -10,7 +10,7 @@ import collection.mutable.SynchronizedBuffer
 
 import java.io.{ObjectInputStream, IOException}
 
-import org.scalatest.FunSuite
+import org.scalatest.{BeforeAndAfter, FunSuite}
 
 /**
  * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
@@ -56,7 +56,7 @@ class TestOutputStream[T: ClassManifest](parent: DStream[T], val output: ArrayBu
  * This is the base trait for Spark Streaming testsuites. This provides basic functionality
  * to run user-defined set of input on user-defined stream operations, and verify the output.
  */
-trait TestSuiteBase extends FunSuite with Logging {
+trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
 
   def framework = "TestSuiteBase"
 
@@ -64,7 +64,7 @@ trait TestSuiteBase extends FunSuite with Logging {
 
   def batchDuration = Seconds(1)
 
-  def checkpointDir = null.asInstanceOf[String]
+  def checkpointDir = "checkpoint"
 
   def checkpointInterval = batchDuration
 
diff --git a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
index f9ba1f20f0..0c6e928835 100644
--- a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
@@ -11,6 +11,11 @@ class WindowOperationsSuite extends TestSuiteBase {
 
   override def batchDuration = Seconds(1)
 
+  after {
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.master.port")
+  }
+
   val largerSlideInput = Seq(
     Seq(("a", 1)),
     Seq(("a", 2)),  // 1st window from here

From 5f74ead63643df83b04646c08e9bfc6b4b4a9ca9 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Sun, 20 Jan 2013 08:59:20 -0800
Subject: [PATCH 124/291] Changes based on Matei's comment

---
 docs/ec2-scripts.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md
index 8b069ca9ad..931b7a66bd 100644
--- a/docs/ec2-scripts.md
+++ b/docs/ec2-scripts.md
@@ -96,8 +96,9 @@ permissions on your private key file, you can run `launch` with the
     `spark-ec2` to attach a persistent EBS volume to each node for
     storing the persistent HDFS.
 -   Finally, if you get errors while running your jobs, look at the slave's logs
-    for that job inside of the Mesos work directory (/mnt/mesos-work). Mesos errors 
-    can be found using the Mesos web UI (`http://<master-hostname>:8080`).
+    for that job inside of the Mesos work directory (/mnt/mesos-work). You can
+    also view the status of the cluster using the Mesos web UI 
+    (`http://<master-hostname>:8080`).
 
 # Configuration
 

From 2a8c2a67909c4878ea24ec94f203287e55dd3782 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 10:24:53 -0800
Subject: [PATCH 125/291] Minor formatting fixes

---
 examples/src/main/scala/spark/examples/SparkALS.scala | 4 ++--
 python/examples/als.py                                | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/src/main/scala/spark/examples/SparkALS.scala b/examples/src/main/scala/spark/examples/SparkALS.scala
index 2766ad1702..5e01885dbb 100644
--- a/examples/src/main/scala/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/spark/examples/SparkALS.scala
@@ -75,8 +75,8 @@ object SparkALS {
 
     (0 to 5).map(i => {
       i match {
-        case a if a < args.length => Option(args(a))
-        case _ => Option(null)
+        case a if a < args.length => Some(args(a))
+        case _ => None
       }
     }).toArray match {
       case Array(host_, m, u, f, iters, slices_) => {
diff --git a/python/examples/als.py b/python/examples/als.py
index 284cf0d3a2..010f80097f 100755
--- a/python/examples/als.py
+++ b/python/examples/als.py
@@ -68,4 +68,4 @@ if __name__ == "__main__":
 
         error = rmse(R, ms, us)
         print "Iteration %d:" % i 
-        print "\nRMSE: %5.4f\n" % error
\ No newline at end of file
+        print "\nRMSE: %5.4f\n" % error

From ea739251eb763b756a282534268e765b8d4b70f0 Mon Sep 17 00:00:00 2001
From: seanm <sean.mcnamara@webtrends.com>
Date: Sun, 20 Jan 2013 11:29:21 -0700
Subject: [PATCH 126/291] adding updateStateByKey object lifecycle test

---
 .../streaming/BasicOperationsSuite.scala      | 45 +++++++++++++++++++
 .../scala/spark/streaming/TestSuiteBase.scala |  5 +++
 2 files changed, 50 insertions(+)

diff --git a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
index f73f9b1823..2bc94463b1 100644
--- a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
@@ -160,6 +160,51 @@ class BasicOperationsSuite extends TestSuiteBase {
     testOperation(inputData, updateStateOperation, outputData, true)
   }
 
+  test("updateStateByKey - object lifecycle") {
+    val inputData =
+      Seq(
+        Seq("a","b"),
+        null,
+        Seq("a","c","a"),
+        Seq("c"),
+        null,
+        null
+      )
+
+    val outputData =
+      Seq(
+        Seq(("a", 1), ("b", 1)),
+        Seq(("a", 1), ("b", 1)),
+        Seq(("a", 3), ("c", 1)),
+        Seq(("a", 3), ("c", 2)),
+        Seq(("c", 2)),
+        Seq()
+      )
+
+    val updateStateOperation = (s: DStream[String]) => {
+      class StateObject(var counter: Int = 0, var expireCounter: Int = 0) extends Serializable
+
+      // updateFunc clears a state when a StateObject is seen without new values twice in a row
+      val updateFunc = (values: Seq[Int], state: Option[StateObject]) => {
+        val stateObj = state.getOrElse(new StateObject)
+        values.foldLeft(0)(_ + _) match {
+          case 0 => stateObj.expireCounter += 1 // no new values
+          case n => { // has new values, increment and reset expireCounter
+            stateObj.counter += n
+            stateObj.expireCounter = 0
+          }
+        }
+        stateObj.expireCounter match {
+          case 2 => None // seen twice with no new values, give it the boot
+          case _ => Option(stateObj)
+        }
+      }
+      s.map(_ -> 1).updateStateByKey[StateObject](updateFunc).mapValues(_.counter)
+    }
+
+    testOperation(inputData, updateStateOperation, outputData, true)
+  }
+
   test("forgetting of RDDs - map and window operations") {
     assert(batchDuration === Seconds(1), "Batch duration has changed from 1 second")
 
diff --git a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
index a76f61d4ad..11cfcba827 100644
--- a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
@@ -28,6 +28,11 @@ class TestInputStream[T: ClassManifest](ssc_ : StreamingContext, input: Seq[Seq[
     logInfo("Computing RDD for time " + validTime)
     val index = ((validTime - zeroTime) / slideDuration - 1).toInt
     val selectedInput = if (index < input.size) input(index) else Seq[T]()
+
+    // lets us test cases where RDDs are not created
+    if (selectedInput == null)
+      return None
+
     val rdd = ssc.sc.makeRDD(selectedInput, numPartitions)
     logInfo("Created RDD " + rdd.id + " with " + selectedInput)
     Some(rdd)

From c0694291c81ad775918421941a80a00ca9593a38 Mon Sep 17 00:00:00 2001
From: seanm <sean.mcnamara@webtrends.com>
Date: Sun, 20 Jan 2013 12:09:45 -0700
Subject: [PATCH 127/291] Splitting StreamingContext.queueStream into two
 methods

---
 .../spark/streaming/StreamingContext.scala    | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
index 14500bdcb1..3cec35cb37 100644
--- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
@@ -283,17 +283,31 @@ class StreamingContext private (
   }
 
   /**
-   * Creates a input stream from an queue of RDDs. In each batch,
+   * Creates an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    * @param queue      Queue of RDDs
    * @param oneAtATime Whether only one RDD should be consumed from the queue in every interval
-   * @param defaultRDD Default RDD is returned by the DStream when the queue is empty
    * @tparam T         Type of objects in the RDD
    */
   def queueStream[T: ClassManifest](
       queue: Queue[RDD[T]],
-      oneAtATime: Boolean = true,
-      defaultRDD: RDD[T] = null
+      oneAtATime: Boolean = true
+    ): DStream[T] = {
+    queueStream(queue, oneAtATime, sc.makeRDD(Seq[T](), 1))
+  }
+
+  /**
+   * Creates an input stream from a queue of RDDs. In each batch,
+   * it will process either one or all of the RDDs returned by the queue.
+   * @param queue      Queue of RDDs
+   * @param oneAtATime Whether only one RDD should be consumed from the queue in every interval
+   * @param defaultRDD Default RDD is returned by the DStream when the queue is empty. Set as null if no RDD should be returned when empty
+   * @tparam T         Type of objects in the RDD
+   */
+  def queueStream[T: ClassManifest](
+      queue: Queue[RDD[T]],
+      oneAtATime: Boolean,
+      defaultRDD: RDD[T]
     ): DStream[T] = {
     val inputStream = new QueueInputDStream(this, queue, oneAtATime, defaultRDD)
     registerInputStream(inputStream)

From 17035db159e191a11cd86882c97078581073deb2 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 11:22:38 -0800
Subject: [PATCH 128/291] Add __repr__ to Accumulator; fix bug in
 sc.accumulator

---
 python/pyspark/accumulators.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index c00c3a37af..8011779ddc 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -11,6 +11,12 @@
 >>> a.value
 7
 
+>>> sc.accumulator(1.0).value
+1.0
+
+>>> sc.accumulator(1j).value
+1j
+
 >>> rdd = sc.parallelize([1,2,3])
 >>> def f(x):
 ...     global a
@@ -124,6 +130,9 @@ class Accumulator(object):
     def __str__(self):
         return str(self._value)
 
+    def __repr__(self):
+        return "Accumulator<id=%i, value=%s>" % (self.aid, self._value)
+
 
 class AddingAccumulatorParam(object):
     """
@@ -145,7 +154,7 @@ class AddingAccumulatorParam(object):
 
 # Singleton accumulator params for some standard types
 INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)
-DOUBLE_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
+FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
 COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
 
 

From 7ed1bf4b485131d58ea6728e7247b79320aca9e6 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Wed, 16 Jan 2013 19:15:14 -0800
Subject: [PATCH 129/291] Add RDD checkpointing to Python API.

---
 .../scala/spark/api/python/PythonRDD.scala    |  3 --
 python/epydoc.conf                            |  2 +-
 python/pyspark/context.py                     |  9 ++++
 python/pyspark/rdd.py                         | 34 ++++++++++++++
 python/pyspark/tests.py                       | 46 +++++++++++++++++++
 python/run-tests                              |  3 ++
 6 files changed, 93 insertions(+), 4 deletions(-)
 create mode 100644 python/pyspark/tests.py

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 89f7c316dc..8c38262dd8 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -135,8 +135,6 @@ private[spark] class PythonRDD[T: ClassManifest](
     }
   }
 
-  override def checkpoint() { }
-
   val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
 }
 
@@ -152,7 +150,6 @@ private class PairwiseRDD(prev: RDD[Array[Byte]]) extends
       case Seq(a, b) => (a, b)
       case x          => throw new Exception("PairwiseRDD: unexpected value: " + x)
     }
-  override def checkpoint() { }
   val asJavaPairRDD : JavaPairRDD[Array[Byte], Array[Byte]] = JavaPairRDD.fromRDD(this)
 }
 
diff --git a/python/epydoc.conf b/python/epydoc.conf
index 91ac984ba2..45102cd9fe 100644
--- a/python/epydoc.conf
+++ b/python/epydoc.conf
@@ -16,4 +16,4 @@ target: docs/
 private: no
 
 exclude: pyspark.cloudpickle pyspark.worker pyspark.join pyspark.serializers
-         pyspark.java_gateway pyspark.examples pyspark.shell
+         pyspark.java_gateway pyspark.examples pyspark.shell pyspark.test
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1e2f845f9c..a438b43fdc 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -195,3 +195,12 @@ class SparkContext(object):
         filename = path.split("/")[-1]
         os.environ["PYTHONPATH"] = \
             "%s:%s" % (filename, os.environ["PYTHONPATH"])
+
+    def setCheckpointDir(self, dirName, useExisting=False):
+        """
+        Set the directory under which RDDs are going to be checkpointed. This
+        method will create this directory and will throw an exception of the
+        path already exists (to avoid overwriting existing files may be
+        overwritten). The directory will be deleted on exit if indicated.
+        """
+        self._jsc.sc().setCheckpointDir(dirName, useExisting)
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d705f0f9e1..9b676cae4a 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -49,6 +49,40 @@ class RDD(object):
         self._jrdd.cache()
         return self
 
+    def checkpoint(self):
+        """
+        Mark this RDD for checkpointing. The RDD will be saved to a file inside
+        `checkpointDir` (set using setCheckpointDir()) and all references to
+        its parent RDDs will be removed.  This is used to truncate very long
+        lineages.  In the current implementation, Spark will save this RDD to
+        a file (using saveAsObjectFile()) after the first job using this RDD is
+        done.  Hence, it is strongly recommended to use checkpoint() on RDDs
+        when
+
+        (i) checkpoint() is called before the any job has been executed on this
+        RDD.
+
+        (ii) This RDD has been made to persist in memory. Otherwise saving it
+        on a file will require recomputation.
+        """
+        self._jrdd.rdd().checkpoint()
+
+    def isCheckpointed(self):
+        """
+        Return whether this RDD has been checkpointed or not
+        """
+        return self._jrdd.rdd().isCheckpointed()
+
+    def getCheckpointFile(self):
+        """
+        Gets the name of the file to which this RDD was checkpointed
+        """
+        checkpointFile = self._jrdd.rdd().getCheckpointFile()
+        if checkpointFile.isDefined():
+            return checkpointFile.get()
+        else:
+            return None
+
     # TODO persist(self, storageLevel)
 
     def map(self, f, preservesPartitioning=False):
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
new file mode 100644
index 0000000000..c959d5dec7
--- /dev/null
+++ b/python/pyspark/tests.py
@@ -0,0 +1,46 @@
+"""
+Unit tests for PySpark; additional tests are implemented as doctests in
+individual modules.
+"""
+import atexit
+import os
+import shutil
+from tempfile import NamedTemporaryFile
+import time
+import unittest
+
+from pyspark.context import SparkContext
+
+
+class TestCheckpoint(unittest.TestCase):
+
+    def setUp(self):
+        self.sc = SparkContext('local[4]', 'TestPartitioning', batchSize=2)
+
+    def tearDown(self):
+        self.sc.stop()
+
+    def test_basic_checkpointing(self):
+        checkpointDir = NamedTemporaryFile(delete=False)
+        os.unlink(checkpointDir.name)
+        self.sc.setCheckpointDir(checkpointDir.name)
+
+        parCollection = self.sc.parallelize([1, 2, 3, 4])
+        flatMappedRDD = parCollection.flatMap(lambda x: range(1, x + 1))
+
+        self.assertFalse(flatMappedRDD.isCheckpointed())
+        self.assertIsNone(flatMappedRDD.getCheckpointFile())
+
+        flatMappedRDD.checkpoint()
+        result = flatMappedRDD.collect()
+        time.sleep(1)  # 1 second
+        self.assertTrue(flatMappedRDD.isCheckpointed())
+        self.assertEqual(flatMappedRDD.collect(), result)
+        self.assertEqual(checkpointDir.name,
+                         os.path.dirname(flatMappedRDD.getCheckpointFile()))
+
+        atexit.register(lambda: shutil.rmtree(checkpointDir.name))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/run-tests b/python/run-tests
index 32470911f9..ce214e98a8 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -14,6 +14,9 @@ FAILED=$(($?||$FAILED))
 $FWDIR/pyspark -m doctest pyspark/accumulators.py
 FAILED=$(($?||$FAILED))
 
+$FWDIR/pyspark -m unittest pyspark.tests
+FAILED=$(($?||$FAILED))
+
 if [[ $FAILED != 0 ]]; then
     echo -en "\033[31m"  # Red
     echo "Had test failures; see logs."

From d0ba80dc727d00b2b7627dcefd2c77009af55f7d Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 13:59:45 -0800
Subject: [PATCH 130/291] Add checkpointFile() and more tests to PySpark.

---
 python/pyspark/context.py |  6 +++++-
 python/pyspark/rdd.py     |  9 ++++++++-
 python/pyspark/tests.py   | 24 ++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index a438b43fdc..8beb8e2ae9 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -123,6 +123,10 @@ class SparkContext(object):
         jrdd = self._jsc.textFile(name, minSplits)
         return RDD(jrdd, self)
 
+    def _checkpointFile(self, name):
+        jrdd = self._jsc.checkpointFile(name)
+        return RDD(jrdd, self)
+
     def union(self, rdds):
         """
         Build the union of a list of RDDs.
@@ -145,7 +149,7 @@ class SparkContext(object):
     def accumulator(self, value, accum_param=None):
         """
         Create an C{Accumulator} with the given initial value, using a given
-        AccumulatorParam helper object to define how to add values of the data 
+        AccumulatorParam helper object to define how to add values of the data
         type if provided. Default AccumulatorParams are used for integers and
         floating-point numbers if you do not provide one. For other types, the
         AccumulatorParam must implement two methods:
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 9b676cae4a..2a2ff9b271 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -32,6 +32,7 @@ class RDD(object):
     def __init__(self, jrdd, ctx):
         self._jrdd = jrdd
         self.is_cached = False
+        self.is_checkpointed = False
         self.ctx = ctx
 
     @property
@@ -65,6 +66,7 @@ class RDD(object):
         (ii) This RDD has been made to persist in memory. Otherwise saving it
         on a file will require recomputation.
         """
+        self.is_checkpointed = True
         self._jrdd.rdd().checkpoint()
 
     def isCheckpointed(self):
@@ -696,7 +698,7 @@ class PipelinedRDD(RDD):
     20
     """
     def __init__(self, prev, func, preservesPartitioning=False):
-        if isinstance(prev, PipelinedRDD) and not prev.is_cached:
+        if isinstance(prev, PipelinedRDD) and prev._is_pipelinable:
             prev_func = prev.func
             def pipeline_func(split, iterator):
                 return func(split, prev_func(split, iterator))
@@ -709,6 +711,7 @@ class PipelinedRDD(RDD):
             self.preservesPartitioning = preservesPartitioning
             self._prev_jrdd = prev._jrdd
         self.is_cached = False
+        self.is_checkpointed = False
         self.ctx = prev.ctx
         self.prev = prev
         self._jrdd_val = None
@@ -741,6 +744,10 @@ class PipelinedRDD(RDD):
         self._jrdd_val = python_rdd.asJavaRDD()
         return self._jrdd_val
 
+    @property
+    def _is_pipelinable(self):
+        return not (self.is_cached or self.is_checkpointed)
+
 
 def _test():
     import doctest
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index c959d5dec7..83283fca4f 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -19,6 +19,9 @@ class TestCheckpoint(unittest.TestCase):
 
     def tearDown(self):
         self.sc.stop()
+        # To avoid Akka rebinding to the same port, since it doesn't unbind
+        # immediately on shutdown
+        self.sc.jvm.System.clearProperty("spark.master.port")
 
     def test_basic_checkpointing(self):
         checkpointDir = NamedTemporaryFile(delete=False)
@@ -41,6 +44,27 @@ class TestCheckpoint(unittest.TestCase):
 
         atexit.register(lambda: shutil.rmtree(checkpointDir.name))
 
+    def test_checkpoint_and_restore(self):
+        checkpointDir = NamedTemporaryFile(delete=False)
+        os.unlink(checkpointDir.name)
+        self.sc.setCheckpointDir(checkpointDir.name)
+
+        parCollection = self.sc.parallelize([1, 2, 3, 4])
+        flatMappedRDD = parCollection.flatMap(lambda x: [x])
+
+        self.assertFalse(flatMappedRDD.isCheckpointed())
+        self.assertIsNone(flatMappedRDD.getCheckpointFile())
+
+        flatMappedRDD.checkpoint()
+        flatMappedRDD.count()  # forces a checkpoint to be computed
+        time.sleep(1)  # 1 second
+
+        self.assertIsNotNone(flatMappedRDD.getCheckpointFile())
+        recovered = self.sc._checkpointFile(flatMappedRDD.getCheckpointFile())
+        self.assertEquals([1, 2, 3, 4], recovered.collect())
+
+        atexit.register(lambda: shutil.rmtree(checkpointDir.name))
+
 
 if __name__ == "__main__":
     unittest.main()

From 5b6ea9e9a04994553d0319c541ca356e2e3064a7 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 15:31:41 -0800
Subject: [PATCH 131/291] Update checkpointing API docs in Python/Java.

---
 .../main/scala/spark/api/java/JavaRDDLike.scala | 15 ++++++---------
 .../scala/spark/api/java/JavaSparkContext.scala | 17 +++++++++--------
 python/pyspark/context.py                       | 11 +++++++----
 python/pyspark/rdd.py                           | 17 +++++------------
 4 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/core/src/main/scala/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
index 087270e46d..b3698ffa44 100644
--- a/core/src/main/scala/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
@@ -307,16 +307,13 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
     implicit val kcm: ClassManifest[K] = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
     JavaPairRDD.fromRDD(rdd.keyBy(f))
   }
-  
+
   /**
-   * Mark this RDD for checkpointing. The RDD will be saved to a file inside `checkpointDir`
-   * (set using setCheckpointDir()) and all references to its parent RDDs will be removed.
-   * This is used to truncate very long lineages. In the current implementation, Spark will save
-   * this RDD to a file (using saveAsObjectFile()) after the first job using this RDD is done.
-   * Hence, it is strongly recommended to use checkpoint() on RDDs when
-   * (i) checkpoint() is called before the any job has been executed on this RDD.
-   * (ii) This RDD has been made to persist in memory. Otherwise saving it on a file will
-   * require recomputation.
+   * Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint
+   * directory set with SparkContext.setCheckpointDir() and all references to its parent
+   * RDDs will be removed. This function must be called before any job has been
+   * executed on this RDD. It is strongly recommended that this RDD is persisted in
+   * memory, otherwise saving it on a file will require recomputation.
    */
   def checkpoint() = rdd.checkpoint()
 
diff --git a/core/src/main/scala/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
index fa2f14113d..14699961ad 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
@@ -357,20 +357,21 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   }
 
   /**
-   * Set the directory under which RDDs are going to be checkpointed. This method will
-   * create this directory and will throw an exception of the path already exists (to avoid
-   * overwriting existing files may be overwritten). The directory will be deleted on exit
-   * if indicated.
+   * Set the directory under which RDDs are going to be checkpointed. The directory must
+   * be a HDFS path if running on a cluster. If the directory does not exist, it will
+   * be created. If the directory exists and useExisting is set to true, then the
+   * exisiting directory will be used. Otherwise an exception will be thrown to
+   * prevent accidental overriding of checkpoint files in the existing directory.
    */
   def setCheckpointDir(dir: String, useExisting: Boolean) {
     sc.setCheckpointDir(dir, useExisting)
   }
 
   /**
-   * Set the directory under which RDDs are going to be checkpointed. This method will
-   * create this directory and will throw an exception of the path already exists (to avoid
-   * overwriting existing files may be overwritten). The directory will be deleted on exit
-   * if indicated.
+   * Set the directory under which RDDs are going to be checkpointed. The directory must
+   * be a HDFS path if running on a cluster. If the directory does not exist, it will
+   * be created. If the directory exists, an exception will be thrown to prevent accidental
+   * overriding of checkpoint files.
    */
   def setCheckpointDir(dir: String) {
     sc.setCheckpointDir(dir)
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 8beb8e2ae9..dcbed37270 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -202,9 +202,12 @@ class SparkContext(object):
 
     def setCheckpointDir(self, dirName, useExisting=False):
         """
-        Set the directory under which RDDs are going to be checkpointed. This
-        method will create this directory and will throw an exception of the
-        path already exists (to avoid overwriting existing files may be
-        overwritten). The directory will be deleted on exit if indicated.
+        Set the directory under which RDDs are going to be checkpointed. The
+        directory must be a HDFS path if running on a cluster.
+
+        If the directory does not exist, it will be created. If the directory
+        exists and C{useExisting} is set to true, then the exisiting directory
+        will be used.  Otherwise an exception will be thrown to prevent
+        accidental overriding of checkpoint files in the existing directory.
         """
         self._jsc.sc().setCheckpointDir(dirName, useExisting)
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 2a2ff9b271..7b6ab956ee 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -52,18 +52,11 @@ class RDD(object):
 
     def checkpoint(self):
         """
-        Mark this RDD for checkpointing. The RDD will be saved to a file inside
-        `checkpointDir` (set using setCheckpointDir()) and all references to
-        its parent RDDs will be removed.  This is used to truncate very long
-        lineages.  In the current implementation, Spark will save this RDD to
-        a file (using saveAsObjectFile()) after the first job using this RDD is
-        done.  Hence, it is strongly recommended to use checkpoint() on RDDs
-        when
-
-        (i) checkpoint() is called before the any job has been executed on this
-        RDD.
-
-        (ii) This RDD has been made to persist in memory. Otherwise saving it
+        Mark this RDD for checkpointing. It will be saved to a file inside the
+        checkpoint directory set with L{SparkContext.setCheckpointDir()} and
+        all references to its parent RDDs will be removed. This function must
+        be called before any job has been executed on this RDD. It is strongly
+        recommended that this RDD is persisted in memory, otherwise saving it
         on a file will require recomputation.
         """
         self.is_checkpointed = True

From 00d70cd6602d5ff2718e319ec04defbdd486237e Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 15:38:11 -0800
Subject: [PATCH 132/291] Clean up setup code in PySpark checkpointing tests

---
 python/pyspark/rdd.py   |  3 +--
 python/pyspark/tests.py | 19 +++++--------------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 7b6ab956ee..097cdb13b4 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -691,7 +691,7 @@ class PipelinedRDD(RDD):
     20
     """
     def __init__(self, prev, func, preservesPartitioning=False):
-        if isinstance(prev, PipelinedRDD) and prev._is_pipelinable:
+        if isinstance(prev, PipelinedRDD) and prev._is_pipelinable():
             prev_func = prev.func
             def pipeline_func(split, iterator):
                 return func(split, prev_func(split, iterator))
@@ -737,7 +737,6 @@ class PipelinedRDD(RDD):
         self._jrdd_val = python_rdd.asJavaRDD()
         return self._jrdd_val
 
-    @property
     def _is_pipelinable(self):
         return not (self.is_cached or self.is_checkpointed)
 
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 83283fca4f..b0a403b580 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -2,7 +2,6 @@
 Unit tests for PySpark; additional tests are implemented as doctests in
 individual modules.
 """
-import atexit
 import os
 import shutil
 from tempfile import NamedTemporaryFile
@@ -16,18 +15,18 @@ class TestCheckpoint(unittest.TestCase):
 
     def setUp(self):
         self.sc = SparkContext('local[4]', 'TestPartitioning', batchSize=2)
+        self.checkpointDir = NamedTemporaryFile(delete=False)
+        os.unlink(self.checkpointDir.name)
+        self.sc.setCheckpointDir(self.checkpointDir.name)
 
     def tearDown(self):
         self.sc.stop()
         # To avoid Akka rebinding to the same port, since it doesn't unbind
         # immediately on shutdown
         self.sc.jvm.System.clearProperty("spark.master.port")
+        shutil.rmtree(self.checkpointDir.name)
 
     def test_basic_checkpointing(self):
-        checkpointDir = NamedTemporaryFile(delete=False)
-        os.unlink(checkpointDir.name)
-        self.sc.setCheckpointDir(checkpointDir.name)
-
         parCollection = self.sc.parallelize([1, 2, 3, 4])
         flatMappedRDD = parCollection.flatMap(lambda x: range(1, x + 1))
 
@@ -39,16 +38,10 @@ class TestCheckpoint(unittest.TestCase):
         time.sleep(1)  # 1 second
         self.assertTrue(flatMappedRDD.isCheckpointed())
         self.assertEqual(flatMappedRDD.collect(), result)
-        self.assertEqual(checkpointDir.name,
+        self.assertEqual(self.checkpointDir.name,
                          os.path.dirname(flatMappedRDD.getCheckpointFile()))
 
-        atexit.register(lambda: shutil.rmtree(checkpointDir.name))
-
     def test_checkpoint_and_restore(self):
-        checkpointDir = NamedTemporaryFile(delete=False)
-        os.unlink(checkpointDir.name)
-        self.sc.setCheckpointDir(checkpointDir.name)
-
         parCollection = self.sc.parallelize([1, 2, 3, 4])
         flatMappedRDD = parCollection.flatMap(lambda x: [x])
 
@@ -63,8 +56,6 @@ class TestCheckpoint(unittest.TestCase):
         recovered = self.sc._checkpointFile(flatMappedRDD.getCheckpointFile())
         self.assertEquals([1, 2, 3, 4], recovered.collect())
 
-        atexit.register(lambda: shutil.rmtree(checkpointDir.name))
-
 
 if __name__ == "__main__":
     unittest.main()

From 9f211dd3f0132daf72fb39883fa4b28e4fd547ca Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 14 Jan 2013 15:30:42 -0800
Subject: [PATCH 133/291] Fix PythonPartitioner equality; see SPARK-654.

PythonPartitioner did not take the Python-side partitioning function
into account when checking for equality, which might cause problems
in the future.
---
 .../spark/api/python/PythonPartitioner.scala    | 13 +++++++++++--
 .../main/scala/spark/api/python/PythonRDD.scala |  5 -----
 python/pyspark/rdd.py                           | 17 +++++++++++------
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonPartitioner.scala b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
index 648d9402b0..519e310323 100644
--- a/core/src/main/scala/spark/api/python/PythonPartitioner.scala
+++ b/core/src/main/scala/spark/api/python/PythonPartitioner.scala
@@ -6,8 +6,17 @@ import java.util.Arrays
 
 /**
  * A [[spark.Partitioner]] that performs handling of byte arrays, for use by the Python API.
+ *
+ * Stores the unique id() of the Python-side partitioning function so that it is incorporated into
+ * equality comparisons.  Correctness requires that the id is a unique identifier for the
+ * lifetime of the job (i.e. that it is not re-used as the id of a different partitioning
+ * function).  This can be ensured by using the Python id() function and maintaining a reference
+ * to the Python partitioning function so that its id() is not reused.
  */
-private[spark] class PythonPartitioner(override val numPartitions: Int) extends Partitioner {
+private[spark] class PythonPartitioner(
+  override val numPartitions: Int,
+  val pyPartitionFunctionId: Long)
+  extends Partitioner {
 
   override def getPartition(key: Any): Int = {
     if (key == null) {
@@ -32,7 +41,7 @@ private[spark] class PythonPartitioner(override val numPartitions: Int) extends
 
   override def equals(other: Any): Boolean = other match {
     case h: PythonPartitioner =>
-      h.numPartitions == numPartitions
+      h.numPartitions == numPartitions && h.pyPartitionFunctionId == pyPartitionFunctionId
     case _ =>
       false
   }
diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 89f7c316dc..e4c0530241 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -252,11 +252,6 @@ private object Pickle {
   val APPENDS: Byte = 'e'
 }
 
-private class ExtractValue extends spark.api.java.function.Function[(Array[Byte],
-  Array[Byte]), Array[Byte]] {
-  override def call(pair: (Array[Byte], Array[Byte])) : Array[Byte] = pair._2
-}
-
 private class BytesToString extends spark.api.java.function.Function[Array[Byte], String] {
   override def call(arr: Array[Byte]) : String = new String(arr, "UTF-8")
 }
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d705f0f9e1..b58bf24e3e 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -33,6 +33,7 @@ class RDD(object):
         self._jrdd = jrdd
         self.is_cached = False
         self.ctx = ctx
+        self._partitionFunc = None
 
     @property
     def context(self):
@@ -497,7 +498,7 @@ class RDD(object):
         return python_right_outer_join(self, other, numSplits)
 
     # TODO: add option to control map-side combining
-    def partitionBy(self, numSplits, hashFunc=hash):
+    def partitionBy(self, numSplits, partitionFunc=hash):
         """
         Return a copy of the RDD partitioned using the specified partitioner.
 
@@ -514,17 +515,21 @@ class RDD(object):
         def add_shuffle_key(split, iterator):
             buckets = defaultdict(list)
             for (k, v) in iterator:
-                buckets[hashFunc(k) % numSplits].append((k, v))
+                buckets[partitionFunc(k) % numSplits].append((k, v))
             for (split, items) in buckets.iteritems():
                 yield str(split)
                 yield dump_pickle(Batch(items))
         keyed = PipelinedRDD(self, add_shuffle_key)
         keyed._bypass_serializer = True
         pairRDD = self.ctx.jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
-        partitioner = self.ctx.jvm.spark.api.python.PythonPartitioner(numSplits)
-        jrdd = pairRDD.partitionBy(partitioner)
-        jrdd = jrdd.map(self.ctx.jvm.ExtractValue())
-        return RDD(jrdd, self.ctx)
+        partitioner = self.ctx.jvm.PythonPartitioner(numSplits,
+                                                     id(partitionFunc))
+        jrdd = pairRDD.partitionBy(partitioner).values()
+        rdd = RDD(jrdd, self.ctx)
+        # This is required so that id(partitionFunc) remains unique, even if
+        # partitionFunc is a lambda:
+        rdd._partitionFunc = partitionFunc
+        return rdd
 
     # TODO: add control over map-side aggregation
     def combineByKey(self, createCombiner, mergeValue, mergeCombiners,

From 6e3754bf4759ab3e1e1be978b6b84e6f17742106 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 19:22:24 -0800
Subject: [PATCH 134/291] Add Maven build file for streaming, and fix some
 issues in SBT file

As part of this, changed our Scala 2.9.2 Kafka library to be available
as a local Maven repository, following the example in
(http://blog.dub.podval.org/2010/01/maven-in-project-repository.html)
---
 examples/pom.xml                              |  17 ++
 pom.xml                                       |  12 ++
 project/SparkBuild.scala                      |  16 +-
 repl/pom.xml                                  |  14 ++
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.jar}  | Bin
 .../0.7.2-spark/kafka-0.7.2-spark.jar.md5     |   1 +
 .../0.7.2-spark/kafka-0.7.2-spark.jar.sha1    |   1 +
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.pom   |   9 +
 .../0.7.2-spark/kafka-0.7.2-spark.pom.md5     |   1 +
 .../0.7.2-spark/kafka-0.7.2-spark.pom.sha1    |   1 +
 .../kafka/kafka/maven-metadata-local.xml      |  12 ++
 .../kafka/kafka/maven-metadata-local.xml.md5  |   1 +
 .../kafka/kafka/maven-metadata-local.xml.sha1 |   1 +
 streaming/pom.xml                             | 155 ++++++++++++++++++
 14 files changed, 234 insertions(+), 7 deletions(-)
 rename streaming/lib/{kafka-0.7.2.jar => org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar} (100%)
 create mode 100644 streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5
 create mode 100644 streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.sha1
 create mode 100644 streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom
 create mode 100644 streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5
 create mode 100644 streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.sha1
 create mode 100644 streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml
 create mode 100644 streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.md5
 create mode 100644 streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.sha1
 create mode 100644 streaming/pom.xml

diff --git a/examples/pom.xml b/examples/pom.xml
index 3355deb6b7..4d43103475 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -19,6 +19,11 @@
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.twitter4j</groupId>
+      <artifactId>twitter4j-stream</artifactId>
+      <version>3.0.3</version>
+    </dependency>
 
     <dependency>
       <groupId>org.scalatest</groupId>
@@ -57,6 +62,12 @@
           <version>${project.version}</version>
           <classifier>hadoop1</classifier>
         </dependency>
+        <dependency>
+          <groupId>org.spark-project</groupId>
+          <artifactId>spark-streaming</artifactId>
+          <version>${project.version}</version>
+          <classifier>hadoop1</classifier>
+        </dependency>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-core</artifactId>
@@ -90,6 +101,12 @@
           <version>${project.version}</version>
           <classifier>hadoop2</classifier>
         </dependency>
+        <dependency>
+          <groupId>org.spark-project</groupId>
+          <artifactId>spark-streaming</artifactId>
+          <version>${project.version}</version>
+          <classifier>hadoop2</classifier>
+        </dependency>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-core</artifactId>
diff --git a/pom.xml b/pom.xml
index 751189a9d8..483b0f9595 100644
--- a/pom.xml
+++ b/pom.xml
@@ -41,6 +41,7 @@
     <module>core</module>
     <module>bagel</module>
     <module>examples</module>
+    <module>streaming</module>
     <module>repl</module>
     <module>repl-bin</module>
   </modules>
@@ -104,6 +105,17 @@
         <enabled>false</enabled>
       </snapshots>
     </repository>
+    <repository>
+      <id>twitter4j-repo</id>
+      <name>Twitter4J Repository</name>
+      <url>http://twitter4j.org/maven2/</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
   </repositories>
   <pluginRepositories>
     <pluginRepository>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 3dbb993f9c..03b8094f7d 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -21,7 +21,7 @@ object SparkBuild extends Build {
 
   lazy val core = Project("core", file("core"), settings = coreSettings)
 
-  lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core)
+  lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core) dependsOn (streaming)
 
   lazy val examples = Project("examples", file("examples"), settings = examplesSettings) dependsOn (core) dependsOn (streaming)
 
@@ -92,8 +92,7 @@ object SparkBuild extends Build {
       "org.eclipse.jetty" % "jetty-server" % "7.5.3.v20111011",
       "org.scalatest" %% "scalatest" % "1.8" % "test",
       "org.scalacheck" %% "scalacheck" % "1.9" % "test",
-      "com.novocode" % "junit-interface" % "0.8" % "test",
-      "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile"
+      "com.novocode" % "junit-interface" % "0.8" % "test"
     ),
     parallelExecution := false,
     /* Workaround for issue #206 (fixed after SBT 0.11.0) */
@@ -136,8 +135,6 @@ object SparkBuild extends Build {
       "com.typesafe.akka" % "akka-slf4j" % "2.0.3",
       "it.unimi.dsi" % "fastutil" % "6.4.4",
       "colt" % "colt" % "1.2.0",
-      "org.twitter4j" % "twitter4j-core" % "3.0.2",
-      "org.twitter4j" % "twitter4j-stream" % "3.0.2",
       "cc.spray" % "spray-can" % "1.0-M2.1",
       "cc.spray" % "spray-server" % "1.0-M2.1",
       "cc.spray" %%  "spray-json" % "1.1.1",
@@ -156,7 +153,10 @@ object SparkBuild extends Build {
   )
 
   def examplesSettings = sharedSettings ++ Seq(
-    name := "spark-examples"
+    name := "spark-examples",
+    libraryDependencies ++= Seq(
+      "org.twitter4j" % "twitter4j-stream" % "3.0.3"
+    )
   )
 
   def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
@@ -164,7 +164,9 @@ object SparkBuild extends Build {
   def streamingSettings = sharedSettings ++ Seq(
     name := "spark-streaming",
     libraryDependencies ++= Seq(
-      "com.github.sgroschupf" % "zkclient" % "0.1")
+      "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile",
+      "com.github.sgroschupf" % "zkclient" % "0.1"
+    )
   ) ++ assemblySettings ++ extraAssemblySettings
 
   def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq(
diff --git a/repl/pom.xml b/repl/pom.xml
index 38e883c7f8..2fc9692969 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -101,6 +101,13 @@
           <classifier>hadoop1</classifier>
           <scope>runtime</scope>
         </dependency>
+        <dependency>
+          <groupId>org.spark-project</groupId>
+          <artifactId>spark-streaming</artifactId>
+          <version>${project.version}</version>
+          <classifier>hadoop1</classifier>
+          <scope>runtime</scope>
+        </dependency>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-core</artifactId>
@@ -151,6 +158,13 @@
           <classifier>hadoop2</classifier>
           <scope>runtime</scope>
         </dependency>
+        <dependency>
+          <groupId>org.spark-project</groupId>
+          <artifactId>spark-streaming</artifactId>
+          <version>${project.version}</version>
+          <classifier>hadoop2</classifier>
+          <scope>runtime</scope>
+        </dependency>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-core</artifactId>
diff --git a/streaming/lib/kafka-0.7.2.jar b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar
similarity index 100%
rename from streaming/lib/kafka-0.7.2.jar
rename to streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5 b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5
new file mode 100644
index 0000000000..29f45f4adb
--- /dev/null
+++ b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5
@@ -0,0 +1 @@
+18876b8bc2e4cef28b6d191aa49d963f
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.sha1 b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.sha1
new file mode 100644
index 0000000000..e3bd62bac0
--- /dev/null
+++ b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.sha1
@@ -0,0 +1 @@
+06b27270ffa52250a2c08703b397c99127b72060
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom
new file mode 100644
index 0000000000..082d35726a
--- /dev/null
+++ b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.kafka</groupId>
+  <artifactId>kafka</artifactId>
+  <version>0.7.2-spark</version>
+  <description>POM was created from install:install-file</description>
+</project>
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5 b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5
new file mode 100644
index 0000000000..92c4132b5b
--- /dev/null
+++ b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5
@@ -0,0 +1 @@
+7bc4322266e6032bdf9ef6eebdd8097d
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.sha1 b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.sha1
new file mode 100644
index 0000000000..8a1d8a097a
--- /dev/null
+++ b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.sha1
@@ -0,0 +1 @@
+d0f79e8eff0db43ca7bcf7dce2c8cd2972685c9d
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml
new file mode 100644
index 0000000000..720cd51c2f
--- /dev/null
+++ b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<metadata>
+  <groupId>org.apache.kafka</groupId>
+  <artifactId>kafka</artifactId>
+  <versioning>
+    <release>0.7.2-spark</release>
+    <versions>
+      <version>0.7.2-spark</version>
+    </versions>
+    <lastUpdated>20130121015225</lastUpdated>
+  </versioning>
+</metadata>
diff --git a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.md5 b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.md5
new file mode 100644
index 0000000000..a4ce5dc9e8
--- /dev/null
+++ b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.md5
@@ -0,0 +1 @@
+e2b9c7c5f6370dd1d21a0aae5e8dcd77
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.sha1 b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.sha1
new file mode 100644
index 0000000000..b869eaf2a6
--- /dev/null
+++ b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.sha1
@@ -0,0 +1 @@
+2a4341da936b6c07a09383d17ffb185ac558ee91
\ No newline at end of file
diff --git a/streaming/pom.xml b/streaming/pom.xml
new file mode 100644
index 0000000000..3dae815e1a
--- /dev/null
+++ b/streaming/pom.xml
@@ -0,0 +1,155 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.spark-project</groupId>
+    <artifactId>parent</artifactId>
+    <version>0.7.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.spark-project</groupId>
+  <artifactId>spark-streaming</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Streaming</name>
+  <url>http://spark-project.org/</url>
+
+  <repositories>
+    <!-- A repository in the local filesystem for the Kafka JAR, which we modified for Scala 2.9 -->
+    <repository>
+      <id>lib</id>
+      <url>file://${project.basedir}/lib</url>
+    </repository>
+  </repositories>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+      <version>1.9.11</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka</artifactId>
+      <version>0.7.2-spark</version>  <!-- Comes from our in-project repository -->
+    </dependency>
+    <dependency>
+      <groupId>org.apache.flume</groupId>
+      <artifactId>flume-ng-sdk</artifactId>
+      <version>1.2.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.github.sgroschupf</groupId>
+      <artifactId>zkclient</artifactId>
+      <version>0.1</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>hadoop1</id>
+      <activation>
+        <property>
+          <name>!hadoopVersion</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.spark-project</groupId>
+          <artifactId>spark-core</artifactId>
+          <version>${project.version}</version>
+          <classifier>hadoop1</classifier>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+          <scope>provided</scope>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-jar-plugin</artifactId>
+            <configuration>
+              <classifier>hadoop1</classifier>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>hadoop2</id>
+      <activation>
+        <property>
+          <name>hadoopVersion</name>
+          <value>2</value>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.spark-project</groupId>
+          <artifactId>spark-core</artifactId>
+          <version>${project.version}</version>
+          <classifier>hadoop2</classifier>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+          <scope>provided</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+          <scope>provided</scope>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-jar-plugin</artifactId>
+            <configuration>
+              <classifier>hadoop2</classifier>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+</project>

From 4750907c3dad4c275d0f51937a098ba856098b96 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 21:05:17 -0800
Subject: [PATCH 135/291] Update run script to deal with change to build of
 REPL shaded JAR

---
 run | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/run b/run
index 060856007f..a094629449 100755
--- a/run
+++ b/run
@@ -89,9 +89,11 @@ if [ -e "$FWDIR/lib_managed" ]; then
   CLASSPATH+=":$FWDIR/lib_managed/bundles/*"
 fi
 CLASSPATH+=":$REPL_DIR/lib/*"
-for jar in `find "$REPL_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
-  CLASSPATH+=":$jar"
-done
+if [ -e repl-bin/target ]; then
+  for jar in `find "repl-bin/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
+    CLASSPATH+=":$jar"
+  done
+fi
 CLASSPATH+=":$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
 for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
   CLASSPATH+=":$jar"

From c0b9ceb8c3d56c6d6f6f6b5925c87abad06be646 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 21 Jan 2013 00:23:53 -0800
Subject: [PATCH 136/291] Log remote lifecycle events in Akka for easier
 debugging

---
 core/src/main/scala/spark/util/AkkaUtils.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/spark/util/AkkaUtils.scala
index e67cb0336d..fbd0ff46bf 100644
--- a/core/src/main/scala/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/spark/util/AkkaUtils.scala
@@ -32,6 +32,7 @@ private[spark] object AkkaUtils {
       akka.event-handlers = ["akka.event.slf4j.Slf4jEventHandler"]
       akka.actor.provider = "akka.remote.RemoteActorRefProvider"
       akka.remote.transport = "akka.remote.netty.NettyRemoteTransport"
+      akka.remote.log-remote-lifecycle-events = on
       akka.remote.netty.hostname = "%s"
       akka.remote.netty.port = %d
       akka.remote.netty.connection-timeout = %ds

From 69a417858bf1627de5220d41afba64853d4bf64d Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Mon, 21 Jan 2013 12:42:11 -0600
Subject: [PATCH 137/291] Also use hadoopConfiguration in newAPI methods.

---
 core/src/main/scala/spark/PairRDDFunctions.scala | 4 ++--
 core/src/main/scala/spark/SparkContext.scala     | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala
index 51c15837c4..1c18736805 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/spark/PairRDDFunctions.scala
@@ -494,7 +494,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[_ <: NewOutputFormat[_, _]]) {
-    saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass, new Configuration)
+    saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass)
   }
 
   /**
@@ -506,7 +506,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[_ <: NewOutputFormat[_, _]],
-      conf: Configuration) {
+      conf: Configuration = self.context.hadoopConfiguration) {
     val job = new NewAPIHadoopJob(conf)
     job.setOutputKeyClass(keyClass)
     job.setOutputValueClass(valueClass)
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index f6b98c41bc..303e5081a4 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -293,8 +293,7 @@ class SparkContext(
         path,
         fm.erasure.asInstanceOf[Class[F]],
         km.erasure.asInstanceOf[Class[K]],
-        vm.erasure.asInstanceOf[Class[V]],
-        new Configuration(hadoopConfiguration))
+        vm.erasure.asInstanceOf[Class[V]])
   }
 
   /**
@@ -306,7 +305,7 @@ class SparkContext(
       fClass: Class[F],
       kClass: Class[K],
       vClass: Class[V],
-      conf: Configuration): RDD[(K, V)] = {
+      conf: Configuration = hadoopConfiguration): RDD[(K, V)] = {
     val job = new NewHadoopJob(conf)
     NewFileInputFormat.addInputPath(job, new Path(path))
     val updatedConf = job.getConfiguration
@@ -318,7 +317,7 @@ class SparkContext(
    * and extra configuration options to pass to the input format.
    */
   def newAPIHadoopRDD[K, V, F <: NewInputFormat[K, V]](
-      conf: Configuration,
+      conf: Configuration = hadoopConfiguration,
       fClass: Class[F],
       kClass: Class[K],
       vClass: Class[V]): RDD[(K, V)] = {

From f116d6b5c6029c2f96160bd84829a6fe8b73cccf Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 18 Jan 2013 13:24:37 -0800
Subject: [PATCH 138/291] executor can use a different sparkHome from Worker

---
 core/src/main/scala/spark/deploy/DeployMessage.scala     | 4 +++-
 core/src/main/scala/spark/deploy/JobDescription.scala    | 5 ++++-
 core/src/main/scala/spark/deploy/client/TestClient.scala | 3 ++-
 core/src/main/scala/spark/deploy/master/Master.scala     | 9 +++++----
 core/src/main/scala/spark/deploy/worker/Worker.scala     | 4 ++--
 .../scheduler/cluster/SparkDeploySchedulerBackend.scala  | 3 ++-
 6 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/DeployMessage.scala b/core/src/main/scala/spark/deploy/DeployMessage.scala
index 457122745b..7ee3e63429 100644
--- a/core/src/main/scala/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/spark/deploy/DeployMessage.scala
@@ -5,6 +5,7 @@ import spark.deploy.master.{WorkerInfo, JobInfo}
 import spark.deploy.worker.ExecutorRunner
 import scala.collection.immutable.List
 import scala.collection.mutable.HashMap
+import java.io.File
 
 
 private[spark] sealed trait DeployMessage extends Serializable
@@ -42,7 +43,8 @@ private[spark] case class LaunchExecutor(
     execId: Int,
     jobDesc: JobDescription,
     cores: Int,
-    memory: Int)
+    memory: Int,
+    sparkHome: File)
   extends DeployMessage
 
 
diff --git a/core/src/main/scala/spark/deploy/JobDescription.scala b/core/src/main/scala/spark/deploy/JobDescription.scala
index 20879c5f11..7f8f9af417 100644
--- a/core/src/main/scala/spark/deploy/JobDescription.scala
+++ b/core/src/main/scala/spark/deploy/JobDescription.scala
@@ -1,10 +1,13 @@
 package spark.deploy
 
+import java.io.File
+
 private[spark] class JobDescription(
     val name: String,
     val cores: Int,
     val memoryPerSlave: Int,
-    val command: Command)
+    val command: Command,
+    val sparkHome: File)
   extends Serializable {
 
   val user = System.getProperty("user.name", "<unknown>")
diff --git a/core/src/main/scala/spark/deploy/client/TestClient.scala b/core/src/main/scala/spark/deploy/client/TestClient.scala
index 57a7e123b7..dc743b1fbf 100644
--- a/core/src/main/scala/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/spark/deploy/client/TestClient.scala
@@ -3,6 +3,7 @@ package spark.deploy.client
 import spark.util.AkkaUtils
 import spark.{Logging, Utils}
 import spark.deploy.{Command, JobDescription}
+import java.io.File
 
 private[spark] object TestClient {
 
@@ -25,7 +26,7 @@ private[spark] object TestClient {
     val url = args(0)
     val (actorSystem, port) = AkkaUtils.createActorSystem("spark", Utils.localIpAddress, 0)
     val desc = new JobDescription(
-      "TestClient", 1, 512, Command("spark.deploy.client.TestExecutor", Seq(), Map()))
+      "TestClient", 1, 512, Command("spark.deploy.client.TestExecutor", Seq(), Map()), new File("dummy-spark-home"))
     val listener = new TestListener
     val client = new Client(actorSystem, url, desc, listener)
     client.start()
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
index 6ecebe626a..f0bee67159 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -6,6 +6,7 @@ import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, Remote
 
 import java.text.SimpleDateFormat
 import java.util.Date
+import java.io.File
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 
@@ -173,7 +174,7 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
         for (pos <- 0 until numUsable) {
           if (assigned(pos) > 0) {
             val exec = job.addExecutor(usableWorkers(pos), assigned(pos))
-            launchExecutor(usableWorkers(pos), exec)
+            launchExecutor(usableWorkers(pos), exec, job.desc.sparkHome)
             job.state = JobState.RUNNING
           }
         }
@@ -186,7 +187,7 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
             val coresToUse = math.min(worker.coresFree, job.coresLeft)
             if (coresToUse > 0) {
               val exec = job.addExecutor(worker, coresToUse)
-              launchExecutor(worker, exec)
+              launchExecutor(worker, exec, job.desc.sparkHome)
               job.state = JobState.RUNNING
             }
           }
@@ -195,10 +196,10 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
     }
   }
 
-  def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo) {
+  def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo, sparkHome: File) {
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
-    worker.actor ! LaunchExecutor(exec.job.id, exec.id, exec.job.desc, exec.cores, exec.memory)
+    worker.actor ! LaunchExecutor(exec.job.id, exec.id, exec.job.desc, exec.cores, exec.memory, sparkHome)
     exec.job.actor ! ExecutorAdded(exec.id, worker.id, worker.host, exec.cores, exec.memory)
   }
 
diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala
index 7c9e588ea2..078b2d8037 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/spark/deploy/worker/Worker.scala
@@ -119,10 +119,10 @@ private[spark] class Worker(
       logError("Worker registration failed: " + message)
       System.exit(1)
 
-    case LaunchExecutor(jobId, execId, jobDesc, cores_, memory_) =>
+    case LaunchExecutor(jobId, execId, jobDesc, cores_, memory_, execSparkHome_) =>
       logInfo("Asked to launch executor %s/%d for %s".format(jobId, execId, jobDesc.name))
       val manager = new ExecutorRunner(
-        jobId, execId, jobDesc, cores_, memory_, self, workerId, ip, sparkHome, workDir)
+        jobId, execId, jobDesc, cores_, memory_, self, workerId, ip, execSparkHome_, workDir)
       executors(jobId + "/" + execId) = manager
       manager.start()
       coresUsed += cores_
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index e2301347e5..0dcc2efaca 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -4,6 +4,7 @@ import spark.{Utils, Logging, SparkContext}
 import spark.deploy.client.{Client, ClientListener}
 import spark.deploy.{Command, JobDescription}
 import scala.collection.mutable.HashMap
+import java.io.File
 
 private[spark] class SparkDeploySchedulerBackend(
     scheduler: ClusterScheduler,
@@ -39,7 +40,7 @@ private[spark] class SparkDeploySchedulerBackend(
       StandaloneSchedulerBackend.ACTOR_NAME)
     val args = Seq(masterUrl, "{{SLAVEID}}", "{{HOSTNAME}}", "{{CORES}}")
     val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
-    val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command)
+    val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, new File(sc.sparkHome))
 
     client = new Client(sc.env.actorSystem, master, jobDesc, this)
     client.start()

From aae5a920a4db0c31918a65a03ce7d2087826fd65 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 18 Jan 2013 13:28:50 -0800
Subject: [PATCH 139/291] get sparkHome the correct way

---
 .../spark/scheduler/cluster/SparkDeploySchedulerBackend.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 0dcc2efaca..08b9d6ff47 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -40,7 +40,7 @@ private[spark] class SparkDeploySchedulerBackend(
       StandaloneSchedulerBackend.ACTOR_NAME)
     val args = Seq(masterUrl, "{{SLAVEID}}", "{{HOSTNAME}}", "{{CORES}}")
     val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
-    val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, new File(sc.sparkHome))
+    val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, new File(sc.getSparkHome()))
 
     client = new Client(sc.env.actorSystem, master, jobDesc, this)
     client.start()

From 5bf73df7f08b17719711a5f05f0b3390b4951272 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Sat, 19 Jan 2013 13:26:15 -0800
Subject: [PATCH 140/291] oops, fix stupid compile error

---
 .../spark/scheduler/cluster/SparkDeploySchedulerBackend.scala  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 08b9d6ff47..94886d3941 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -40,7 +40,8 @@ private[spark] class SparkDeploySchedulerBackend(
       StandaloneSchedulerBackend.ACTOR_NAME)
     val args = Seq(masterUrl, "{{SLAVEID}}", "{{HOSTNAME}}", "{{CORES}}")
     val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
-    val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, new File(sc.getSparkHome()))
+    val sparkHome = sc.getSparkHome().getOrElse(throw new IllegalArgumentException("must supply spark home for spark standalone"))
+    val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, new File(sparkHome))
 
     client = new Client(sc.env.actorSystem, master, jobDesc, this)
     client.start()

From c73107500e0a5b6c5f0b4aba8c4504ee4c2adbaf Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Sun, 20 Jan 2013 21:55:50 -0800
Subject: [PATCH 141/291] send sparkHome as String instead of File over network

---
 core/src/main/scala/spark/deploy/DeployMessage.scala | 2 +-
 core/src/main/scala/spark/deploy/master/Master.scala | 2 +-
 core/src/main/scala/spark/deploy/worker/Worker.scala | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/DeployMessage.scala b/core/src/main/scala/spark/deploy/DeployMessage.scala
index 7ee3e63429..a4081ef89c 100644
--- a/core/src/main/scala/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/spark/deploy/DeployMessage.scala
@@ -44,7 +44,7 @@ private[spark] case class LaunchExecutor(
     jobDesc: JobDescription,
     cores: Int,
     memory: Int,
-    sparkHome: File)
+    sparkHome: String)
   extends DeployMessage
 
 
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
index f0bee67159..1b6f808a51 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -199,7 +199,7 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
   def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo, sparkHome: File) {
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
-    worker.actor ! LaunchExecutor(exec.job.id, exec.id, exec.job.desc, exec.cores, exec.memory, sparkHome)
+    worker.actor ! LaunchExecutor(exec.job.id, exec.id, exec.job.desc, exec.cores, exec.memory, sparkHome.getAbsolutePath)
     exec.job.actor ! ExecutorAdded(exec.id, worker.id, worker.host, exec.cores, exec.memory)
   }
 
diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala
index 078b2d8037..19bf2be118 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/spark/deploy/worker/Worker.scala
@@ -122,7 +122,7 @@ private[spark] class Worker(
     case LaunchExecutor(jobId, execId, jobDesc, cores_, memory_, execSparkHome_) =>
       logInfo("Asked to launch executor %s/%d for %s".format(jobId, execId, jobDesc.name))
       val manager = new ExecutorRunner(
-        jobId, execId, jobDesc, cores_, memory_, self, workerId, ip, execSparkHome_, workDir)
+        jobId, execId, jobDesc, cores_, memory_, self, workerId, ip, new File(execSparkHome_), workDir)
       executors(jobId + "/" + execId) = manager
       manager.start()
       coresUsed += cores_

From fe26acc482f358bf87700f5e80160f7ce558cea7 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Sun, 20 Jan 2013 21:57:44 -0800
Subject: [PATCH 142/291] remove unused imports

---
 core/src/main/scala/spark/deploy/DeployMessage.scala | 2 --
 1 file changed, 2 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/DeployMessage.scala b/core/src/main/scala/spark/deploy/DeployMessage.scala
index a4081ef89c..35f40c6e91 100644
--- a/core/src/main/scala/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/spark/deploy/DeployMessage.scala
@@ -4,8 +4,6 @@ import spark.deploy.ExecutorState.ExecutorState
 import spark.deploy.master.{WorkerInfo, JobInfo}
 import spark.deploy.worker.ExecutorRunner
 import scala.collection.immutable.List
-import scala.collection.mutable.HashMap
-import java.io.File
 
 
 private[spark] sealed trait DeployMessage extends Serializable

From a3f571b539ffd126e9f3bc3e9c7bedfcb6f4d2d4 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Mon, 21 Jan 2013 10:52:17 -0800
Subject: [PATCH 143/291] more File -> String changes

---
 core/src/main/scala/spark/deploy/JobDescription.scala        | 4 +---
 core/src/main/scala/spark/deploy/client/TestClient.scala     | 3 +--
 core/src/main/scala/spark/deploy/master/Master.scala         | 5 ++---
 .../scheduler/cluster/SparkDeploySchedulerBackend.scala      | 4 +---
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/JobDescription.scala b/core/src/main/scala/spark/deploy/JobDescription.scala
index 7f8f9af417..7160fc05fc 100644
--- a/core/src/main/scala/spark/deploy/JobDescription.scala
+++ b/core/src/main/scala/spark/deploy/JobDescription.scala
@@ -1,13 +1,11 @@
 package spark.deploy
 
-import java.io.File
-
 private[spark] class JobDescription(
     val name: String,
     val cores: Int,
     val memoryPerSlave: Int,
     val command: Command,
-    val sparkHome: File)
+    val sparkHome: String)
   extends Serializable {
 
   val user = System.getProperty("user.name", "<unknown>")
diff --git a/core/src/main/scala/spark/deploy/client/TestClient.scala b/core/src/main/scala/spark/deploy/client/TestClient.scala
index dc743b1fbf..8764c400e2 100644
--- a/core/src/main/scala/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/spark/deploy/client/TestClient.scala
@@ -3,7 +3,6 @@ package spark.deploy.client
 import spark.util.AkkaUtils
 import spark.{Logging, Utils}
 import spark.deploy.{Command, JobDescription}
-import java.io.File
 
 private[spark] object TestClient {
 
@@ -26,7 +25,7 @@ private[spark] object TestClient {
     val url = args(0)
     val (actorSystem, port) = AkkaUtils.createActorSystem("spark", Utils.localIpAddress, 0)
     val desc = new JobDescription(
-      "TestClient", 1, 512, Command("spark.deploy.client.TestExecutor", Seq(), Map()), new File("dummy-spark-home"))
+      "TestClient", 1, 512, Command("spark.deploy.client.TestExecutor", Seq(), Map()), "dummy-spark-home")
     val listener = new TestListener
     val client = new Client(actorSystem, url, desc, listener)
     client.start()
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
index 1b6f808a51..2c2cd0231b 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -6,7 +6,6 @@ import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, Remote
 
 import java.text.SimpleDateFormat
 import java.util.Date
-import java.io.File
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 
@@ -196,10 +195,10 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
     }
   }
 
-  def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo, sparkHome: File) {
+  def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo, sparkHome: String) {
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
-    worker.actor ! LaunchExecutor(exec.job.id, exec.id, exec.job.desc, exec.cores, exec.memory, sparkHome.getAbsolutePath)
+    worker.actor ! LaunchExecutor(exec.job.id, exec.id, exec.job.desc, exec.cores, exec.memory, sparkHome)
     exec.job.actor ! ExecutorAdded(exec.id, worker.id, worker.host, exec.cores, exec.memory)
   }
 
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 94886d3941..a21a5b2f3d 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -3,8 +3,6 @@ package spark.scheduler.cluster
 import spark.{Utils, Logging, SparkContext}
 import spark.deploy.client.{Client, ClientListener}
 import spark.deploy.{Command, JobDescription}
-import scala.collection.mutable.HashMap
-import java.io.File
 
 private[spark] class SparkDeploySchedulerBackend(
     scheduler: ClusterScheduler,
@@ -41,7 +39,7 @@ private[spark] class SparkDeploySchedulerBackend(
     val args = Seq(masterUrl, "{{SLAVEID}}", "{{HOSTNAME}}", "{{CORES}}")
     val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
     val sparkHome = sc.getSparkHome().getOrElse(throw new IllegalArgumentException("must supply spark home for spark standalone"))
-    val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, new File(sparkHome))
+    val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, sparkHome)
 
     client = new Client(sc.env.actorSystem, master, jobDesc, this)
     client.start()

From 4d34c7fc3ecd7a4d035005f84c01e6990c0c345e Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 21 Jan 2013 11:33:48 -0800
Subject: [PATCH 144/291] Fix compile error caused by cherry-pick

---
 .../spark/scheduler/cluster/SparkDeploySchedulerBackend.scala    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index a21a5b2f3d..4f82cd96dd 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -3,6 +3,7 @@ package spark.scheduler.cluster
 import spark.{Utils, Logging, SparkContext}
 import spark.deploy.client.{Client, ClientListener}
 import spark.deploy.{Command, JobDescription}
+import scala.collection.mutable.HashMap
 
 private[spark] class SparkDeploySchedulerBackend(
     scheduler: ClusterScheduler,

From a88b44ed3b670633549049e9ccf990ea455e9720 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 21 Jan 2013 11:59:21 -0800
Subject: [PATCH 145/291] Only bind to IPv4 addresses when trying to
 auto-detect external IP

---
 core/src/main/scala/spark/Utils.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
index b3421df27c..692a3f4050 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/spark/Utils.scala
@@ -1,7 +1,7 @@
 package spark
 
 import java.io._
-import java.net.{NetworkInterface, InetAddress, URL, URI}
+import java.net.{NetworkInterface, InetAddress, Inet4Address, URL, URI}
 import java.util.{Locale, Random, UUID}
 import java.util.concurrent.{Executors, ThreadFactory, ThreadPoolExecutor}
 import org.apache.hadoop.conf.Configuration
@@ -251,7 +251,8 @@ private object Utils extends Logging {
         // Address resolves to something like 127.0.1.1, which happens on Debian; try to find
         // a better address using the local network interfaces
         for (ni <- NetworkInterface.getNetworkInterfaces) {
-          for (addr <- ni.getInetAddresses if !addr.isLinkLocalAddress && !addr.isLoopbackAddress) {
+          for (addr <- ni.getInetAddresses if !addr.isLinkLocalAddress &&
+               !addr.isLoopbackAddress && addr.isInstanceOf[Inet4Address]) {
             // We've found an address that looks reasonable!
             logWarning("Your hostname, " + InetAddress.getLocalHost.getHostName + " resolves to" +
               " a loopback address: " + address.getHostAddress + "; using " + addr.getHostAddress +

From 2173f6c7cac877a3b756d63aabf7bdd06a18e6d9 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 21 Jan 2013 13:02:40 -0800
Subject: [PATCH 146/291] Clarify the documentation on env variables for
 standalone mode

---
 docs/spark-standalone.md | 43 ++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index e0ba7c35cb..bf296221b8 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -51,11 +51,11 @@ Finally, the following configuration options can be passed to the master and wor
   </tr>
   <tr>
     <td><code>-c CORES</code>, <code>--cores CORES</code></td>
-    <td>Number of CPU cores to use (default: all available); only on worker</td>
+    <td>Total CPU cores to allow Spark jobs to use on the machine (default: all available); only on worker</td>
   </tr>
   <tr>
     <td><code>-m MEM</code>, <code>--memory MEM</code></td>
-    <td>Amount of memory to use, in a format like 1000M or 2G (default: your machine's total RAM minus 1 GB); only on worker</td>
+    <td>Total amount of memory to allow Spark jobs to use on the machine, in a format like 1000M or 2G (default: your machine's total RAM minus 1 GB); only on worker</td>
   </tr>
   <tr>
     <td><code>-d DIR</code>, <code>--work-dir DIR</code></td>
@@ -66,9 +66,20 @@ Finally, the following configuration options can be passed to the master and wor
 
 # Cluster Launch Scripts
 
-To launch a Spark standalone cluster with the deploy scripts, you need to set up two files, `conf/spark-env.sh` and `conf/slaves`. The `conf/spark-env.sh` file lets you specify global settings for the master and slave instances, such as memory, or port numbers to bind to, while `conf/slaves` is a list of slave nodes. The system requires that all the slave machines have the same configuration files, so *copy these files to each machine*.
+To launch a Spark standalone cluster with the deploy scripts, you need to create a file called `conf/slaves` in your Spark directory, which should contain the hostnames of all the machines where you would like to start Spark workers, one per line. The master machine must be able to access each of the slave machines via password-less `ssh` (using a private key). For testing, you can just put `localhost` in this file.
 
-In `conf/spark-env.sh`, you can set the following parameters, in addition to the [standard Spark configuration settings](configuration.html):
+Once you've set up this fine, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`:
+
+- `bin/start-master.sh` - Starts a master instance on the machine the script is executed on.
+- `bin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file.
+- `bin/start-all.sh` - Starts both a master and a number of slaves as described above.
+- `bin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script.
+- `bin/stop-slaves.sh` - Stops the slave instances that were started via `bin/start-slaves.sh`.
+- `bin/stop-all.sh` - Stops both the master and the slaves as described above.
+
+Note that these scripts must be executed on the machine you want to run the Spark master on, not your local machine.
+
+You can optionally configure the cluster further by setting environment variables in `conf/spark-env.sh`. Create this file by starting with the `conf/spark-env.sh.template`, and _copy it to all your worker machines_ for the settings to take effect. The following settings are available:
 
 <table class="table">
   <tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr>
@@ -88,36 +99,24 @@ In `conf/spark-env.sh`, you can set the following parameters, in addition to the
     <td><code>SPARK_WORKER_PORT</code></td>
     <td>Start the Spark worker on a specific port (default: random)</td>
   </tr>
+  <tr>
+    <td><code>SPARK_WORKER_DIR</code></td>
+    <td>Directory to run jobs in, which will include both logs and scratch space (default: SPARK_HOME/work)</td>
+  </tr>
   <tr>
     <td><code>SPARK_WORKER_CORES</code></td>
-    <td>Number of cores to use (default: all available cores)</td>
+    <td>Total number of cores to allow Spark jobs to use on the machine (default: all available cores)</td>
   </tr>
   <tr>
     <td><code>SPARK_WORKER_MEMORY</code></td>
-    <td>How much memory to use, e.g. 1000M, 2G (default: total memory minus 1 GB)</td>
+    <td>Total amount of memory to allow Spark jobs to use on the machine, e.g. 1000M, 2G (default: total memory minus 1 GB); note that each job's <i>individual</i> memory is configured using <code>SPARK_MEM</code></td>
   </tr>
   <tr>
     <td><code>SPARK_WORKER_WEBUI_PORT</code></td>
     <td>Port for the worker web UI (default: 8081)</td>
   </tr>
-  <tr>
-    <td><code>SPARK_WORKER_DIR</code></td>
-    <td>Directory to run jobs in, which will include both logs and scratch space (default: SPARK_HOME/work)</td>
-  </tr>
 </table>
 
-In `conf/slaves`, include a list of all machines where you would like to start a Spark worker, one per line. The master machine must be able to access each of the slave machines via password-less `ssh` (using a private key). For testing purposes, you can have a single `localhost` entry in the slaves file.
-
-Once you've set up these configuration files, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`:
-
-- `bin/start-master.sh` - Starts a master instance on the machine the script is executed on.
-- `bin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file.
-- `bin/start-all.sh` - Starts both a master and a number of slaves as described above.
-- `bin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script.
-- `bin/stop-slaves.sh` - Stops the slave instances that were started via `bin/start-slaves.sh`.
-- `bin/stop-all.sh` - Stops both the master and the slaves as described above.
-
-Note that the scripts must be executed on the machine you want to run the Spark master on, not your local machine.
 
 
 # Connecting a Job to the Cluster

From 76d7c0ce2bd9c4d5782fec320279e0a011230625 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 21 Jan 2013 13:10:02 -0800
Subject: [PATCH 147/291] Add more Akka settings to docs

---
 docs/configuration.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/docs/configuration.md b/docs/configuration.md
index 87cb4a6797..036a0df480 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -197,6 +197,15 @@ Apart from these, the following properties are also available, and may be useful
     poor data locality, but the default generally works well.
   </td>
 </tr>
+<tr>
+  <td>spark.akka.frameSize</td>
+  <td>10</td>
+  <td>
+    Maximum message size to allow in "control plane" communication (for serialized tasks and task
+    results), in MB. Increase this if your tasks need to send back large results to the master
+    (e.g. using <code>collect()</code> on a large dataset).
+  </td>
+</tr>
 <tr>
   <td>spark.akka.threads</td>
   <td>4</td>
@@ -205,6 +214,13 @@ Apart from these, the following properties are also available, and may be useful
     when the master has a lot of CPU cores.
   </td>
 </tr>
+<tr>
+  <td>spark.akka.timeout</td>
+  <td>20</td>
+  <td>
+    Communication timeout between Spark nodes.
+  </td>
+</tr>
 <tr>
   <td>spark.master.host</td>
   <td>(local hostname)</td>

From ffd1623595cdce4080ad1e4e676e65898ebdd6dd Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Mon, 21 Jan 2013 15:55:46 -0600
Subject: [PATCH 148/291] Minor cleanup.

---
 core/src/main/scala/spark/Accumulators.scala     |  3 +--
 core/src/main/scala/spark/Logging.scala          |  3 +--
 .../main/scala/spark/ParallelCollection.scala    | 15 +++++----------
 core/src/main/scala/spark/TaskContext.scala      |  3 +--
 core/src/main/scala/spark/rdd/BlockRDD.scala     |  6 ++----
 core/src/main/scala/spark/rdd/CartesianRDD.scala |  3 +--
 core/src/main/scala/spark/rdd/CoGroupedRDD.scala |  6 ++----
 core/src/main/scala/spark/rdd/NewHadoopRDD.scala |  6 ++----
 core/src/main/scala/spark/rdd/SampledRDD.scala   |  5 ++---
 core/src/main/scala/spark/rdd/ShuffledRDD.scala  |  3 +--
 core/src/main/scala/spark/rdd/UnionRDD.scala     |  3 +--
 core/src/main/scala/spark/rdd/ZippedRDD.scala    |  3 +--
 .../spark/scheduler/local/LocalScheduler.scala   |  4 ++--
 .../mesos/CoarseMesosSchedulerBackend.scala      | 16 ++++++----------
 .../scheduler/mesos/MesosSchedulerBackend.scala  | 10 +++-------
 core/src/test/scala/spark/FileServerSuite.scala  |  4 ++--
 16 files changed, 33 insertions(+), 60 deletions(-)

diff --git a/core/src/main/scala/spark/Accumulators.scala b/core/src/main/scala/spark/Accumulators.scala
index b644aba5f8..57c6df35be 100644
--- a/core/src/main/scala/spark/Accumulators.scala
+++ b/core/src/main/scala/spark/Accumulators.scala
@@ -25,8 +25,7 @@ class Accumulable[R, T] (
   extends Serializable {
   
   val id = Accumulators.newId
-  @transient
-  private var value_ = initialValue // Current value on master
+  @transient private var value_ = initialValue // Current value on master
   val zero = param.zero(initialValue)  // Zero value to be passed to workers
   var deserialized = false
 
diff --git a/core/src/main/scala/spark/Logging.scala b/core/src/main/scala/spark/Logging.scala
index 90bae26202..7c1c1bb144 100644
--- a/core/src/main/scala/spark/Logging.scala
+++ b/core/src/main/scala/spark/Logging.scala
@@ -11,8 +11,7 @@ import org.slf4j.LoggerFactory
 trait Logging {
   // Make the log field transient so that objects with Logging can
   // be serialized and used on another machine
-  @transient
-  private var log_ : Logger = null
+  @transient private var log_ : Logger = null
 
   // Method to get or create the logger for this object
   protected def log: Logger = {
diff --git a/core/src/main/scala/spark/ParallelCollection.scala b/core/src/main/scala/spark/ParallelCollection.scala
index ede933c9e9..ad23e5bec8 100644
--- a/core/src/main/scala/spark/ParallelCollection.scala
+++ b/core/src/main/scala/spark/ParallelCollection.scala
@@ -23,32 +23,28 @@ private[spark] class ParallelCollectionSplit[T: ClassManifest](
 }
 
 private[spark] class ParallelCollection[T: ClassManifest](
-    @transient sc : SparkContext,
+    @transient sc: SparkContext,
     @transient data: Seq[T],
     numSlices: Int,
-    locationPrefs : Map[Int,Seq[String]])
+    locationPrefs: Map[Int,Seq[String]])
   extends RDD[T](sc, Nil) {
   // TODO: Right now, each split sends along its full data, even if later down the RDD chain it gets
   // cached. It might be worthwhile to write the data to a file in the DFS and read it in the split
   // instead.
   // UPDATE: A parallel collection can be checkpointed to HDFS, which achieves this goal.
 
-  @transient
-  var splits_ : Array[Split] = {
+  @transient var splits_ : Array[Split] = {
     val slices = ParallelCollection.slice(data, numSlices).toArray
     slices.indices.map(i => new ParallelCollectionSplit(id, i, slices(i))).toArray
   }
 
-  override def getSplits = splits_.asInstanceOf[Array[Split]]
+  override def getSplits = splits_
 
   override def compute(s: Split, context: TaskContext) =
     s.asInstanceOf[ParallelCollectionSplit[T]].iterator
 
   override def getPreferredLocations(s: Split): Seq[String] = {
-    locationPrefs.get(s.index) match {
-      case Some(s) => s
-      case _ => Nil
-    }
+    locationPrefs.get(s.index) getOrElse Nil
   }
 
   override def clearDependencies() {
@@ -56,7 +52,6 @@ private[spark] class ParallelCollection[T: ClassManifest](
   }
 }
 
-
 private object ParallelCollection {
   /**
    * Slice a collection into numSlices sub-collections. One extra thing we do here is to treat Range
diff --git a/core/src/main/scala/spark/TaskContext.scala b/core/src/main/scala/spark/TaskContext.scala
index d2746b26b3..eab85f85a2 100644
--- a/core/src/main/scala/spark/TaskContext.scala
+++ b/core/src/main/scala/spark/TaskContext.scala
@@ -5,8 +5,7 @@ import scala.collection.mutable.ArrayBuffer
 
 class TaskContext(val stageId: Int, val splitId: Int, val attemptId: Long) extends Serializable {
 
-  @transient
-  val onCompleteCallbacks = new ArrayBuffer[() => Unit]
+  @transient val onCompleteCallbacks = new ArrayBuffer[() => Unit]
 
   // Add a callback function to be executed on task completion. An example use
   // is for HadoopRDD to register a callback to close the input stream.
diff --git a/core/src/main/scala/spark/rdd/BlockRDD.scala b/core/src/main/scala/spark/rdd/BlockRDD.scala
index b1095a52b4..2c022f88e0 100644
--- a/core/src/main/scala/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/spark/rdd/BlockRDD.scala
@@ -11,13 +11,11 @@ private[spark]
 class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[String])
   extends RDD[T](sc, Nil) {
 
-  @transient
-  var splits_ : Array[Split] = (0 until blockIds.size).map(i => {
+  @transient var splits_ : Array[Split] = (0 until blockIds.size).map(i => {
     new BlockRDDSplit(blockIds(i), i).asInstanceOf[Split]
   }).toArray
 
-  @transient
-  lazy val locations_  = {
+  @transient lazy val locations_  = {
     val blockManager = SparkEnv.get.blockManager
     /*val locations = blockIds.map(id => blockManager.getLocations(id))*/
     val locations = blockManager.getLocations(blockIds)
diff --git a/core/src/main/scala/spark/rdd/CartesianRDD.scala b/core/src/main/scala/spark/rdd/CartesianRDD.scala
index 79e7c24e7c..453d410ad4 100644
--- a/core/src/main/scala/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/spark/rdd/CartesianRDD.scala
@@ -35,8 +35,7 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest](
 
   val numSplitsInRdd2 = rdd2.splits.size
 
-  @transient
-  var splits_ = {
+  @transient var splits_ = {
     // create the cross product split
     val array = new Array[Split](rdd1.splits.size * rdd2.splits.size)
     for (s1 <- rdd1.splits; s2 <- rdd2.splits) {
diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
index 1d528be2aa..8fafd27bb6 100644
--- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
@@ -45,8 +45,7 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[(_, _)]], part: Partitioner)
 
   val aggr = new CoGroupAggregator
 
-  @transient
-  var deps_ = {
+  @transient var deps_ = {
     val deps = new ArrayBuffer[Dependency[_]]
     for ((rdd, index) <- rdds.zipWithIndex) {
       if (rdd.partitioner == Some(part)) {
@@ -63,8 +62,7 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[(_, _)]], part: Partitioner)
 
   override def getDependencies = deps_
 
-  @transient
-  var splits_ : Array[Split] = {
+  @transient var splits_ : Array[Split] = {
     val array = new Array[Split](part.numPartitions)
     for (i <- 0 until array.size) {
       array(i) = new CoGroupSplit(i, rdds.zipWithIndex.map { case (r, j) =>
diff --git a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/spark/rdd/NewHadoopRDD.scala
index bb22db073c..c3b155fcbd 100644
--- a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/spark/rdd/NewHadoopRDD.scala
@@ -37,11 +37,9 @@ class NewHadoopRDD[K, V](
     formatter.format(new Date())
   }
 
-  @transient
-  private val jobId = new JobID(jobtrackerId, id)
+  @transient private val jobId = new JobID(jobtrackerId, id)
 
-  @transient
-  private val splits_ : Array[Split] = {
+  @transient private val splits_ : Array[Split] = {
     val inputFormat = inputFormatClass.newInstance
     val jobContext = newJobContext(conf, jobId)
     val rawSplits = inputFormat.getSplits(jobContext).toArray
diff --git a/core/src/main/scala/spark/rdd/SampledRDD.scala b/core/src/main/scala/spark/rdd/SampledRDD.scala
index 1bc9c96112..e24ad23b21 100644
--- a/core/src/main/scala/spark/rdd/SampledRDD.scala
+++ b/core/src/main/scala/spark/rdd/SampledRDD.scala
@@ -19,13 +19,12 @@ class SampledRDD[T: ClassManifest](
     seed: Int)
   extends RDD[T](prev) {
 
-  @transient
-  var splits_ : Array[Split] = {
+  @transient var splits_ : Array[Split] = {
     val rg = new Random(seed)
     firstParent[T].splits.map(x => new SampledRDDSplit(x, rg.nextInt))
   }
 
-  override def getSplits = splits_.asInstanceOf[Array[Split]]
+  override def getSplits = splits_
 
   override def getPreferredLocations(split: Split) =
     firstParent[T].preferredLocations(split.asInstanceOf[SampledRDDSplit].prev)
diff --git a/core/src/main/scala/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/spark/rdd/ShuffledRDD.scala
index 1b219473e0..28ff19876d 100644
--- a/core/src/main/scala/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/spark/rdd/ShuffledRDD.scala
@@ -22,8 +22,7 @@ class ShuffledRDD[K, V](
 
   override val partitioner = Some(part)
 
-  @transient
-  var splits_ = Array.tabulate[Split](part.numPartitions)(i => new ShuffledRDDSplit(i))
+  @transient var splits_ = Array.tabulate[Split](part.numPartitions)(i => new ShuffledRDDSplit(i))
 
   override def getSplits = splits_
 
diff --git a/core/src/main/scala/spark/rdd/UnionRDD.scala b/core/src/main/scala/spark/rdd/UnionRDD.scala
index 24a085df02..82f0a44ecd 100644
--- a/core/src/main/scala/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/spark/rdd/UnionRDD.scala
@@ -28,8 +28,7 @@ class UnionRDD[T: ClassManifest](
     @transient var rdds: Seq[RDD[T]])
   extends RDD[T](sc, Nil) {  // Nil, so the dependencies_ var does not refer to parent RDDs
 
-  @transient
-  var splits_ : Array[Split] = {
+  @transient var splits_ : Array[Split] = {
     val array = new Array[Split](rdds.map(_.splits.size).sum)
     var pos = 0
     for (rdd <- rdds; split <- rdd.splits) {
diff --git a/core/src/main/scala/spark/rdd/ZippedRDD.scala b/core/src/main/scala/spark/rdd/ZippedRDD.scala
index 16e6cc0f1b..d950b06c85 100644
--- a/core/src/main/scala/spark/rdd/ZippedRDD.scala
+++ b/core/src/main/scala/spark/rdd/ZippedRDD.scala
@@ -34,8 +34,7 @@ class ZippedRDD[T: ClassManifest, U: ClassManifest](
 
   // TODO: FIX THIS.
 
-  @transient
-  var splits_ : Array[Split] = {
+  @transient var splits_ : Array[Split] = {
     if (rdd1.splits.size != rdd2.splits.size) {
       throw new IllegalArgumentException("Can't zip RDDs with unequal numbers of partitions")
     }
diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
index dff550036d..21d255debd 100644
--- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
@@ -19,8 +19,8 @@ private[spark] class LocalScheduler(threads: Int, maxFailures: Int, sc: SparkCon
   extends TaskScheduler
   with Logging {
 
-  var attemptId = new AtomicInteger(0)
-  var threadPool = Executors.newFixedThreadPool(threads, DaemonThreadFactory)
+  val attemptId = new AtomicInteger(0)
+  val threadPool = Executors.newFixedThreadPool(threads, DaemonThreadFactory)
   val env = SparkEnv.get
   var listener: TaskSchedulerListener = null
 
diff --git a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
index c45c7df69c..014906b028 100644
--- a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
@@ -64,13 +64,9 @@ private[spark] class CoarseMesosSchedulerBackend(
   val taskIdToSlaveId = new HashMap[Int, String]
   val failuresBySlaveId = new HashMap[String, Int] // How many times tasks on each slave failed
 
-  val sparkHome = sc.getSparkHome() match {
-    case Some(path) =>
-      path
-    case None =>
-      throw new SparkException("Spark home is not set; set it through the spark.home system " +
-        "property, the SPARK_HOME environment variable or the SparkContext constructor")
-  }
+  val sparkHome = sc.getSparkHome().getOrElse(throw new SparkException(
+    "Spark home is not set; set it through the spark.home system " +
+    "property, the SPARK_HOME environment variable or the SparkContext constructor"))
 
   val extraCoresPerSlave = System.getProperty("spark.mesos.extra.cores", "0").toInt
 
@@ -184,7 +180,7 @@ private[spark] class CoarseMesosSchedulerBackend(
   }
 
   /** Helper function to pull out a resource from a Mesos Resources protobuf */
-  def getResource(res: JList[Resource], name: String): Double = {
+  private def getResource(res: JList[Resource], name: String): Double = {
     for (r <- res if r.getName == name) {
       return r.getScalar.getValue
     }
@@ -193,7 +189,7 @@ private[spark] class CoarseMesosSchedulerBackend(
   }
 
   /** Build a Mesos resource protobuf object */
-  def createResource(resourceName: String, quantity: Double): Protos.Resource = {
+  private def createResource(resourceName: String, quantity: Double): Protos.Resource = {
     Resource.newBuilder()
       .setName(resourceName)
       .setType(Value.Type.SCALAR)
@@ -202,7 +198,7 @@ private[spark] class CoarseMesosSchedulerBackend(
   }
 
   /** Check whether a Mesos task state represents a finished task */
-  def isFinished(state: MesosTaskState) = {
+  private def isFinished(state: MesosTaskState) = {
     state == MesosTaskState.TASK_FINISHED ||
       state == MesosTaskState.TASK_FAILED ||
       state == MesosTaskState.TASK_KILLED ||
diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
index 8c7a1dfbc0..2989e31f5e 100644
--- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
@@ -76,13 +76,9 @@ private[spark] class MesosSchedulerBackend(
   }
 
   def createExecutorInfo(): ExecutorInfo = {
-    val sparkHome = sc.getSparkHome() match {
-      case Some(path) =>
-        path
-      case None =>
-        throw new SparkException("Spark home is not set; set it through the spark.home system " +
-          "property, the SPARK_HOME environment variable or the SparkContext constructor")
-    }
+    val sparkHome = sc.getSparkHome().getOrElse(throw new SparkException(
+      "Spark home is not set; set it through the spark.home system " +
+      "property, the SPARK_HOME environment variable or the SparkContext constructor"))
     val execScript = new File(sparkHome, "spark-executor").getCanonicalPath
     val environment = Environment.newBuilder()
     sc.executorEnvs.foreach { case (key, value) =>
diff --git a/core/src/test/scala/spark/FileServerSuite.scala b/core/src/test/scala/spark/FileServerSuite.scala
index b4283d9604..fe964bd893 100644
--- a/core/src/test/scala/spark/FileServerSuite.scala
+++ b/core/src/test/scala/spark/FileServerSuite.scala
@@ -9,8 +9,8 @@ import SparkContext._
 class FileServerSuite extends FunSuite with BeforeAndAfter {
 
   @transient var sc: SparkContext = _
-  @transient var tmpFile : File = _
-  @transient var testJarFile : File = _
+  @transient var tmpFile: File = _
+  @transient var testJarFile: File = _
 
   before {
     // Create a sample text file

From e5ca2413352510297092384eda73049ad601fd8a Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Mon, 21 Jan 2013 16:06:58 -0600
Subject: [PATCH 149/291] Move JavaAPISuite into spark.streaming.

---
 streaming/src/test/java/{ => spark/streaming}/JavaAPISuite.java   | 0
 streaming/src/test/java/{ => spark/streaming}/JavaTestUtils.scala | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename streaming/src/test/java/{ => spark/streaming}/JavaAPISuite.java (100%)
 rename streaming/src/test/java/{ => spark/streaming}/JavaTestUtils.scala (100%)

diff --git a/streaming/src/test/java/JavaAPISuite.java b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
similarity index 100%
rename from streaming/src/test/java/JavaAPISuite.java
rename to streaming/src/test/java/spark/streaming/JavaAPISuite.java
diff --git a/streaming/src/test/java/JavaTestUtils.scala b/streaming/src/test/java/spark/streaming/JavaTestUtils.scala
similarity index 100%
rename from streaming/src/test/java/JavaTestUtils.scala
rename to streaming/src/test/java/spark/streaming/JavaTestUtils.scala

From ef711902c1f42db14c8ddd524195f0a9efb56e65 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 21 Jan 2013 16:42:24 -0800
Subject: [PATCH 150/291] Don't download files to master's working directory.

This should avoid exceptions caused by existing
files with different contents.

I also removed some unused code.
---
 .../src/main/scala/spark/HttpFileServer.scala |  8 ++--
 core/src/main/scala/spark/SparkContext.scala  |  7 ++--
 core/src/main/scala/spark/SparkEnv.scala      | 20 ++++++----
 core/src/main/scala/spark/SparkFiles.java     | 25 ++++++++++++
 core/src/main/scala/spark/Utils.scala         | 16 +-------
 .../spark/api/java/JavaSparkContext.scala     |  5 ++-
 .../scala/spark/api/python/PythonRDD.scala    |  2 +
 .../spark/deploy/worker/ExecutorRunner.scala  |  5 ---
 .../main/scala/spark/executor/Executor.scala  |  6 +--
 .../scheduler/local/LocalScheduler.scala      |  6 +--
 .../test/scala/spark/FileServerSuite.scala    |  9 +++--
 python/pyspark/__init__.py                    |  5 ++-
 python/pyspark/context.py                     | 40 +++++++++++++++++--
 python/pyspark/files.py                       | 24 +++++++++++
 python/pyspark/worker.py                      |  3 ++
 python/run-tests                              |  3 ++
 16 files changed, 133 insertions(+), 51 deletions(-)
 create mode 100644 core/src/main/scala/spark/SparkFiles.java
 create mode 100644 python/pyspark/files.py

diff --git a/core/src/main/scala/spark/HttpFileServer.scala b/core/src/main/scala/spark/HttpFileServer.scala
index 659d17718f..00901d95e2 100644
--- a/core/src/main/scala/spark/HttpFileServer.scala
+++ b/core/src/main/scala/spark/HttpFileServer.scala
@@ -1,9 +1,7 @@
 package spark
 
-import java.io.{File, PrintWriter}
-import java.net.URL
-import scala.collection.mutable.HashMap
-import org.apache.hadoop.fs.FileUtil
+import java.io.{File}
+import com.google.common.io.Files
 
 private[spark] class HttpFileServer extends Logging {
   
@@ -40,7 +38,7 @@ private[spark] class HttpFileServer extends Logging {
   }
   
   def addFileToDir(file: File, dir: File) : String = {
-    Utils.copyFile(file, new File(dir, file.getName))
+    Files.copy(file, new File(dir, file.getName))
     return dir + "/" + file.getName
   }
   
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 8b6f4b3b7d..2eeca66ed6 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -439,9 +439,10 @@ class SparkContext(
   def broadcast[T](value: T) = env.broadcastManager.newBroadcast[T](value, isLocal)
 
   /**
-   * Add a file to be downloaded into the working directory of this Spark job on every node.
+   * Add a file to be downloaded with this Spark job on every node.
    * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported
-   * filesystems), or an HTTP, HTTPS or FTP URI.
+   * filesystems), or an HTTP, HTTPS or FTP URI.  To access the file in Spark jobs,
+   * use `SparkFiles.get(path)` to find its download location.
    */
   def addFile(path: String) {
     val uri = new URI(path)
@@ -454,7 +455,7 @@ class SparkContext(
     // Fetch the file locally in case a job is executed locally.
     // Jobs that run through LocalScheduler will already fetch the required dependencies,
     // but jobs run in DAGScheduler.runLocally() will not so we must fetch the files here.
-    Utils.fetchFile(path, new File("."))
+    Utils.fetchFile(path, new File(SparkFiles.getRootDirectory))
 
     logInfo("Added file " + path + " at " + key + " with timestamp " + addedFiles(key))
   }
diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala
index 41441720a7..6b44e29f4c 100644
--- a/core/src/main/scala/spark/SparkEnv.scala
+++ b/core/src/main/scala/spark/SparkEnv.scala
@@ -28,14 +28,10 @@ class SparkEnv (
     val broadcastManager: BroadcastManager,
     val blockManager: BlockManager,
     val connectionManager: ConnectionManager,
-    val httpFileServer: HttpFileServer
+    val httpFileServer: HttpFileServer,
+    val sparkFilesDir: String
   ) {
 
-  /** No-parameter constructor for unit tests. */
-  def this() = {
-    this(null, new JavaSerializer, new JavaSerializer, null, null, null, null, null, null, null)
-  }
-
   def stop() {
     httpFileServer.stop()
     mapOutputTracker.stop()
@@ -112,6 +108,15 @@ object SparkEnv extends Logging {
     httpFileServer.initialize()
     System.setProperty("spark.fileserver.uri", httpFileServer.serverUri)
 
+    // Set the sparkFiles directory, used when downloading dependencies.  In local mode,
+    // this is a temporary directory; in distributed mode, this is the executor's current working
+    // directory.
+    val sparkFilesDir: String = if (isMaster) {
+      Utils.createTempDir().getAbsolutePath
+    } else {
+      "."
+    }
+
     // Warn about deprecated spark.cache.class property
     if (System.getProperty("spark.cache.class") != null) {
       logWarning("The spark.cache.class property is no longer being used! Specify storage " +
@@ -128,6 +133,7 @@ object SparkEnv extends Logging {
       broadcastManager,
       blockManager,
       connectionManager,
-      httpFileServer)
+      httpFileServer,
+      sparkFilesDir)
   }
 }
diff --git a/core/src/main/scala/spark/SparkFiles.java b/core/src/main/scala/spark/SparkFiles.java
new file mode 100644
index 0000000000..b59d8ce93f
--- /dev/null
+++ b/core/src/main/scala/spark/SparkFiles.java
@@ -0,0 +1,25 @@
+package spark;
+
+import java.io.File;
+
+/**
+ * Resolves paths to files added through `addFile().
+ */
+public class SparkFiles {
+
+  private SparkFiles() {}
+
+  /**
+   * Get the absolute path of a file added through `addFile()`.
+   */
+  public static String get(String filename) {
+    return new File(getRootDirectory(), filename).getAbsolutePath();
+  }
+
+  /**
+   * Get the root directory that contains files added through `addFile()`.
+   */
+  public static String getRootDirectory() {
+    return SparkEnv.get().sparkFilesDir();
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
index 692a3f4050..827c8bd81e 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/spark/Utils.scala
@@ -111,20 +111,6 @@ private object Utils extends Logging {
     }
   }
 
-  /** Copy a file on the local file system */
-  def copyFile(source: File, dest: File) {
-    val in = new FileInputStream(source)
-    val out = new FileOutputStream(dest)
-    copyStream(in, out, true)
-  }
-
-  /** Download a file from a given URL to the local filesystem */
-  def downloadFile(url: URL, localPath: String) {
-    val in = url.openStream()
-    val out = new FileOutputStream(localPath)
-    Utils.copyStream(in, out, true)
-  }
-
   /**
    * Download a file requested by the executor. Supports fetching the file in a variety of ways,
    * including HTTP, HDFS and files on a standard filesystem, based on the URL parameter.
@@ -201,7 +187,7 @@ private object Utils extends Logging {
       Utils.execute(Seq("tar", "-xf", filename), targetDir)
     }
     // Make the file executable - That's necessary for scripts
-    FileUtil.chmod(filename, "a+x")
+    FileUtil.chmod(targetFile.getAbsolutePath, "a+x")
   }
 
   /**
diff --git a/core/src/main/scala/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
index 16c122c584..50b8970cd8 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
@@ -323,9 +323,10 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def getSparkHome(): Option[String] = sc.getSparkHome()
 
   /**
-   * Add a file to be downloaded into the working directory of this Spark job on every node.
+   * Add a file to be downloaded with this Spark job on every node.
    * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported
-   * filesystems), or an HTTP, HTTPS or FTP URI.
+   * filesystems), or an HTTP, HTTPS or FTP URI.  To access the file in Spark jobs,
+   * use `SparkFiles.get(path)` to find its download location.
    */
   def addFile(path: String) {
     sc.addFile(path)
diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 5526406a20..f43a152ca7 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -67,6 +67,8 @@ private[spark] class PythonRDD[T: ClassManifest](
         val dOut = new DataOutputStream(proc.getOutputStream)
         // Split index
         dOut.writeInt(split.index)
+        // sparkFilesDir
+        PythonRDD.writeAsPickle(SparkFiles.getRootDirectory, dOut)
         // Broadcast variables
         dOut.writeInt(broadcastVars.length)
         for (broadcast <- broadcastVars) {
diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
index beceb55ecd..0d1fe2a6b4 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
@@ -106,11 +106,6 @@ private[spark] class ExecutorRunner(
         throw new IOException("Failed to create directory " + executorDir)
       }
 
-      // Download the files it depends on into it (disabled for now)
-      //for (url <- jobDesc.fileUrls) {
-      //  fetchFile(url, executorDir)
-      //}
-
       // Launch the process
       val command = buildCommandSeq()
       val builder = new ProcessBuilder(command: _*).directory(executorDir)
diff --git a/core/src/main/scala/spark/executor/Executor.scala b/core/src/main/scala/spark/executor/Executor.scala
index 2552958d27..70629f6003 100644
--- a/core/src/main/scala/spark/executor/Executor.scala
+++ b/core/src/main/scala/spark/executor/Executor.scala
@@ -162,16 +162,16 @@ private[spark] class Executor extends Logging {
     // Fetch missing dependencies
     for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
       logInfo("Fetching " + name + " with timestamp " + timestamp)
-      Utils.fetchFile(name, new File("."))
+      Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
       currentFiles(name) = timestamp
     }
     for ((name, timestamp) <- newJars if currentJars.getOrElse(name, -1L) < timestamp) {
       logInfo("Fetching " + name + " with timestamp " + timestamp)
-      Utils.fetchFile(name, new File("."))
+      Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
       currentJars(name) = timestamp
       // Add it to our class loader
       val localName = name.split("/").last
-      val url = new File(".", localName).toURI.toURL
+      val url = new File(SparkFiles.getRootDirectory, localName).toURI.toURL
       if (!urlClassLoader.getURLs.contains(url)) {
         logInfo("Adding " + url + " to class loader")
         urlClassLoader.addURL(url)
diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
index dff550036d..4451d314e6 100644
--- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
@@ -116,16 +116,16 @@ private[spark] class LocalScheduler(threads: Int, maxFailures: Int, sc: SparkCon
       // Fetch missing dependencies
       for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
-        Utils.fetchFile(name, new File("."))
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
         currentFiles(name) = timestamp
       }
       for ((name, timestamp) <- newJars if currentJars.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
-        Utils.fetchFile(name, new File("."))
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
         currentJars(name) = timestamp
         // Add it to our class loader
         val localName = name.split("/").last
-        val url = new File(".", localName).toURI.toURL
+        val url = new File(SparkFiles.getRootDirectory, localName).toURI.toURL
         if (!classLoader.getURLs.contains(url)) {
           logInfo("Adding " + url + " to class loader")
           classLoader.addURL(url)
diff --git a/core/src/test/scala/spark/FileServerSuite.scala b/core/src/test/scala/spark/FileServerSuite.scala
index b4283d9604..528c6b8424 100644
--- a/core/src/test/scala/spark/FileServerSuite.scala
+++ b/core/src/test/scala/spark/FileServerSuite.scala
@@ -40,7 +40,8 @@ class FileServerSuite extends FunSuite with BeforeAndAfter {
     sc.addFile(tmpFile.toString)
     val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0))
     val result = sc.parallelize(testData).reduceByKey {
-      val in  = new BufferedReader(new FileReader("FileServerSuite.txt"))
+      val path = SparkFiles.get("FileServerSuite.txt")
+      val in  = new BufferedReader(new FileReader(path))
       val fileVal = in.readLine().toInt
       in.close()
       _ * fileVal + _ * fileVal
@@ -54,7 +55,8 @@ class FileServerSuite extends FunSuite with BeforeAndAfter {
     sc.addFile((new File(tmpFile.toString)).toURL.toString)
     val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0))
     val result = sc.parallelize(testData).reduceByKey {
-      val in  = new BufferedReader(new FileReader("FileServerSuite.txt"))
+      val path = SparkFiles.get("FileServerSuite.txt")
+      val in  = new BufferedReader(new FileReader(path))
       val fileVal = in.readLine().toInt
       in.close()
       _ * fileVal + _ * fileVal
@@ -83,7 +85,8 @@ class FileServerSuite extends FunSuite with BeforeAndAfter {
     sc.addFile(tmpFile.toString)
     val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0))
     val result = sc.parallelize(testData).reduceByKey {
-      val in  = new BufferedReader(new FileReader("FileServerSuite.txt"))
+      val path = SparkFiles.get("FileServerSuite.txt")
+      val in  = new BufferedReader(new FileReader(path))
       val fileVal = in.readLine().toInt
       in.close()
       _ * fileVal + _ * fileVal
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 00666bc0a3..3e8bca62f0 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -11,6 +11,8 @@ Public classes:
         A broadcast variable that gets reused across tasks.
     - L{Accumulator<pyspark.accumulators.Accumulator>}
         An "add-only" shared variable that tasks can only add values to.
+    - L{SparkFiles<pyspark.files.SparkFiles>}
+        Access files shipped with jobs.
 """
 import sys
 import os
@@ -19,6 +21,7 @@ sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.eg
 
 from pyspark.context import SparkContext
 from pyspark.rdd import RDD
+from pyspark.files import SparkFiles
 
 
-__all__ = ["SparkContext", "RDD"]
+__all__ = ["SparkContext", "RDD", "SparkFiles"]
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index dcbed37270..ec0cc7c2f9 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -1,5 +1,7 @@
 import os
 import atexit
+import shutil
+import tempfile
 from tempfile import NamedTemporaryFile
 
 from pyspark import accumulators
@@ -173,10 +175,26 @@ class SparkContext(object):
 
     def addFile(self, path):
         """
-        Add a file to be downloaded into the working directory of this Spark
-        job on every node. The C{path} passed can be either a local file,
-        a file in HDFS (or other Hadoop-supported filesystems), or an HTTP,
-        HTTPS or FTP URI.
+        Add a file to be downloaded with this Spark job on every node.
+        The C{path} passed can be either a local file, a file in HDFS
+        (or other Hadoop-supported filesystems), or an HTTP, HTTPS or
+        FTP URI.
+
+        To access the file in Spark jobs, use
+        L{SparkFiles.get(path)<pyspark.files.SparkFiles.get>} to find its
+        download location.
+
+        >>> from pyspark import SparkFiles
+        >>> path = os.path.join(tempdir, "test.txt")
+        >>> with open(path, "w") as testFile:
+        ...    testFile.write("100")
+        >>> sc.addFile(path)
+        >>> def func(iterator):
+        ...    with open(SparkFiles.get("test.txt")) as testFile:
+        ...        fileVal = int(testFile.readline())
+        ...        return [x * 100 for x in iterator]
+        >>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
+        [100, 200, 300, 400]
         """
         self._jsc.sc().addFile(path)
 
@@ -211,3 +229,17 @@ class SparkContext(object):
         accidental overriding of checkpoint files in the existing directory.
         """
         self._jsc.sc().setCheckpointDir(dirName, useExisting)
+
+
+def _test():
+    import doctest
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    globs['tempdir'] = tempfile.mkdtemp()
+    atexit.register(lambda: shutil.rmtree(globs['tempdir']))
+    doctest.testmod(globs=globs)
+    globs['sc'].stop()
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/files.py b/python/pyspark/files.py
new file mode 100644
index 0000000000..de1334f046
--- /dev/null
+++ b/python/pyspark/files.py
@@ -0,0 +1,24 @@
+import os
+
+
+class SparkFiles(object):
+    """
+    Resolves paths to files added through
+    L{addFile()<pyspark.context.SparkContext.addFile>}.
+
+    SparkFiles contains only classmethods; users should not create SparkFiles
+    instances.
+    """
+
+    _root_directory = None
+
+    def __init__(self):
+        raise NotImplementedError("Do not construct SparkFiles objects")
+
+    @classmethod
+    def get(cls, filename):
+        """
+        Get the absolute path of a file added through C{addFile()}.
+        """
+        path = os.path.join(SparkFiles._root_directory, filename)
+        return os.path.abspath(path)
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index b2b9288089..e7bdb7682b 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -8,6 +8,7 @@ from base64 import standard_b64decode
 from pyspark.accumulators import _accumulatorRegistry
 from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.cloudpickle import CloudPickler
+from pyspark.files import SparkFiles
 from pyspark.serializers import write_with_length, read_with_length, write_int, \
     read_long, read_int, dump_pickle, load_pickle, read_from_pickle_file
 
@@ -23,6 +24,8 @@ def load_obj():
 
 def main():
     split_index = read_int(sys.stdin)
+    spark_files_dir = load_pickle(read_with_length(sys.stdin))
+    SparkFiles._root_directory = spark_files_dir
     num_broadcast_variables = read_int(sys.stdin)
     for _ in range(num_broadcast_variables):
         bid = read_long(sys.stdin)
diff --git a/python/run-tests b/python/run-tests
index ce214e98a8..a3a9ff5dcb 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -8,6 +8,9 @@ FAILED=0
 $FWDIR/pyspark pyspark/rdd.py
 FAILED=$(($?||$FAILED))
 
+$FWDIR/pyspark pyspark/context.py
+FAILED=$(($?||$FAILED))
+
 $FWDIR/pyspark -m doctest pyspark/broadcast.py
 FAILED=$(($?||$FAILED))
 

From 7b9e96c99206c0679d9925e0161fde738a5c7c3a Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 21 Jan 2013 16:45:00 -0800
Subject: [PATCH 151/291] Add synchronization to Executor.updateDependencies()
 (SPARK-662)

---
 .../main/scala/spark/executor/Executor.scala  | 34 ++++++++++---------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/core/src/main/scala/spark/executor/Executor.scala b/core/src/main/scala/spark/executor/Executor.scala
index 70629f6003..28d9d40d43 100644
--- a/core/src/main/scala/spark/executor/Executor.scala
+++ b/core/src/main/scala/spark/executor/Executor.scala
@@ -159,22 +159,24 @@ private[spark] class Executor extends Logging {
    * SparkContext. Also adds any new JARs we fetched to the class loader.
    */
   private def updateDependencies(newFiles: HashMap[String, Long], newJars: HashMap[String, Long]) {
-    // Fetch missing dependencies
-    for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
-      logInfo("Fetching " + name + " with timestamp " + timestamp)
-      Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
-      currentFiles(name) = timestamp
-    }
-    for ((name, timestamp) <- newJars if currentJars.getOrElse(name, -1L) < timestamp) {
-      logInfo("Fetching " + name + " with timestamp " + timestamp)
-      Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
-      currentJars(name) = timestamp
-      // Add it to our class loader
-      val localName = name.split("/").last
-      val url = new File(SparkFiles.getRootDirectory, localName).toURI.toURL
-      if (!urlClassLoader.getURLs.contains(url)) {
-        logInfo("Adding " + url + " to class loader")
-        urlClassLoader.addURL(url)
+    synchronized {
+      // Fetch missing dependencies
+      for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
+        logInfo("Fetching " + name + " with timestamp " + timestamp)
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
+        currentFiles(name) = timestamp
+      }
+      for ((name, timestamp) <- newJars if currentJars.getOrElse(name, -1L) < timestamp) {
+        logInfo("Fetching " + name + " with timestamp " + timestamp)
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
+        currentJars(name) = timestamp
+        // Add it to our class loader
+        val localName = name.split("/").last
+        val url = new File(SparkFiles.getRootDirectory, localName).toURI.toURL
+        if (!urlClassLoader.getURLs.contains(url)) {
+          logInfo("Adding " + url + " to class loader")
+          urlClassLoader.addURL(url)
+        }
       }
     }
   }

From 2d8218b8717435a47d7cea399290b30bf5ef010b Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Mon, 21 Jan 2013 20:00:27 -0600
Subject: [PATCH 152/291] Remove unneeded/now-broken saveAsNewAPIHadoopFile
 overload.

---
 core/src/main/scala/spark/PairRDDFunctions.scala | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala
index 656b820b8a..53b051f1c5 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/spark/PairRDDFunctions.scala
@@ -485,18 +485,6 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
     saveAsNewAPIHadoopFile(path, getKeyClass, getValueClass, fm.erasure.asInstanceOf[Class[F]])
   }
 
-  /**
-   * Output the RDD to any Hadoop-supported file system, using a new Hadoop API `OutputFormat`
-   * (mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.
-   */
-  def saveAsNewAPIHadoopFile(
-      path: String,
-      keyClass: Class[_],
-      valueClass: Class[_],
-      outputFormatClass: Class[_ <: NewOutputFormat[_, _]]) {
-    saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass)
-  }
-
   /**
    * Output the RDD to any Hadoop-supported file system, using a new Hadoop API `OutputFormat`
    * (mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.

From a8baeb93272b03a98e44c7bf5c541611aec4a64b Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Mon, 21 Jan 2013 21:30:24 -0600
Subject: [PATCH 153/291] Further simplify getOrElse call.

---
 core/src/main/scala/spark/ParallelCollection.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/ParallelCollection.scala b/core/src/main/scala/spark/ParallelCollection.scala
index ad23e5bec8..10adcd53ec 100644
--- a/core/src/main/scala/spark/ParallelCollection.scala
+++ b/core/src/main/scala/spark/ParallelCollection.scala
@@ -44,7 +44,7 @@ private[spark] class ParallelCollection[T: ClassManifest](
     s.asInstanceOf[ParallelCollectionSplit[T]].iterator
 
   override def getPreferredLocations(s: Split): Seq[String] = {
-    locationPrefs.get(s.index) getOrElse Nil
+    locationPrefs.getOrElse(s.index, Nil)
   }
 
   override def clearDependencies() {

From c75ae3622eeed068c44b1f823ef4d87d01a720fd Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 20 Jan 2013 15:12:54 -0800
Subject: [PATCH 154/291] Make AccumulatorParam an abstract base class.

---
 python/pyspark/accumulators.py | 29 ++++++++++++++++++++++++++---
 python/pyspark/context.py      | 15 +++++----------
 2 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 8011779ddc..5a9269f9bb 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -61,6 +61,7 @@ Traceback (most recent call last):
 Exception:...
 """
 
+from abc import ABCMeta, abstractmethod
 import struct
 import SocketServer
 import threading
@@ -90,8 +91,7 @@ class Accumulator(object):
 
     While C{SparkContext} supports accumulators for primitive data types like C{int} and
     C{float}, users can also define accumulators for custom types by providing a custom
-    C{AccumulatorParam} object with a C{zero} and C{addInPlace} method. Refer to the doctest
-    of this module for an example.
+    L{AccumulatorParam} object. Refer to the doctest of this module for an example.
     """
 
     def __init__(self, aid, value, accum_param):
@@ -134,7 +134,30 @@ class Accumulator(object):
         return "Accumulator<id=%i, value=%s>" % (self.aid, self._value)
 
 
-class AddingAccumulatorParam(object):
+class AccumulatorParam(object):
+    """
+    Helper object that defines how to accumulate values of a given type.
+    """
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def zero(self, value):
+        """
+        Provide a "zero value" for the type, compatible in dimensions with the
+        provided C{value} (e.g., a zero vector)
+        """
+        return
+
+    @abstractmethod
+    def addInPlace(self, value1, value2):
+        """
+        Add two values of the accumulator's data type, returning a new value;
+        for efficiency, can also update C{value1} in place and return it.
+        """
+        return
+
+
+class AddingAccumulatorParam(AccumulatorParam):
     """
     An AccumulatorParam that uses the + operators to add values. Designed for simple types
     such as integers, floats, and lists. Requires the zero value for the underlying type
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index dcbed37270..a17e7a4ad1 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -148,16 +148,11 @@ class SparkContext(object):
 
     def accumulator(self, value, accum_param=None):
         """
-        Create an C{Accumulator} with the given initial value, using a given
-        AccumulatorParam helper object to define how to add values of the data
-        type if provided. Default AccumulatorParams are used for integers and
-        floating-point numbers if you do not provide one. For other types, the
-        AccumulatorParam must implement two methods:
-        - C{zero(value)}: provide a "zero value" for the type, compatible in
-          dimensions with the provided C{value} (e.g., a zero vector).
-        - C{addInPlace(val1, val2)}: add two values of the accumulator's data
-          type, returning a new value; for efficiency, can also update C{val1}
-          in place and return it.
+        Create an L{Accumulator} with the given initial value, using a given
+        L{AccumulatorParam} helper object to define how to add values of the
+        data type if provided. Default AccumulatorParams are used for integers
+        and floating-point numbers if you do not provide one. For other types,
+        a custom AccumulatorParam can be used.
         """
         if accum_param == None:
             if isinstance(value, int):

From 551a47a620c7dc207e3530e54d794a3c3aa8e45e Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Mon, 21 Jan 2013 23:31:00 -0800
Subject: [PATCH 155/291] Refactor daemon thread pool creation.

---
 .../scala/spark/DaemonThreadFactory.scala     | 18 ----------
 core/src/main/scala/spark/Utils.scala         | 33 ++++---------------
 .../spark/network/ConnectionManager.scala     |  5 ++-
 .../scheduler/local/LocalScheduler.scala      |  2 +-
 .../streaming/dstream/RawInputDStream.scala   |  5 +--
 5 files changed, 13 insertions(+), 50 deletions(-)
 delete mode 100644 core/src/main/scala/spark/DaemonThreadFactory.scala

diff --git a/core/src/main/scala/spark/DaemonThreadFactory.scala b/core/src/main/scala/spark/DaemonThreadFactory.scala
deleted file mode 100644
index 56e59adeb7..0000000000
--- a/core/src/main/scala/spark/DaemonThreadFactory.scala
+++ /dev/null
@@ -1,18 +0,0 @@
-package spark
-
-import java.util.concurrent.ThreadFactory
-
-/**
- * A ThreadFactory that creates daemon threads
- */
-private object DaemonThreadFactory extends ThreadFactory {
-  override def newThread(r: Runnable): Thread = new DaemonThread(r)
-}
-
-private class DaemonThread(r: Runnable = null) extends Thread {
-  override def run() {
-    if (r != null) {
-      r.run()
-    }
-  }
-}
\ No newline at end of file
diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
index 692a3f4050..9b8636f6c8 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/spark/Utils.scala
@@ -10,6 +10,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.collection.JavaConversions._
 import scala.io.Source
 import com.google.common.io.Files
+import com.google.common.util.concurrent.ThreadFactoryBuilder
 
 /**
  * Various utility methods used by Spark.
@@ -287,29 +288,14 @@ private object Utils extends Logging {
     customHostname.getOrElse(InetAddress.getLocalHost.getHostName)
   }
 
-  /**
-   * Returns a standard ThreadFactory except all threads are daemons.
-   */
-  private def newDaemonThreadFactory: ThreadFactory = {
-    new ThreadFactory {
-      def newThread(r: Runnable): Thread = {
-        var t = Executors.defaultThreadFactory.newThread (r)
-        t.setDaemon (true)
-        return t
-      }
-    }
-  }
+  private[spark] val daemonThreadFactory: ThreadFactory =
+    new ThreadFactoryBuilder().setDaemon(true).build()
 
   /**
    * Wrapper over newCachedThreadPool.
    */
-  def newDaemonCachedThreadPool(): ThreadPoolExecutor = {
-    var threadPool = Executors.newCachedThreadPool.asInstanceOf[ThreadPoolExecutor]
-
-    threadPool.setThreadFactory (newDaemonThreadFactory)
-
-    return threadPool
-  }
+  def newDaemonCachedThreadPool(): ThreadPoolExecutor =
+    Executors.newCachedThreadPool(daemonThreadFactory).asInstanceOf[ThreadPoolExecutor]
 
   /**
    * Return the string to tell how long has passed in seconds. The passing parameter should be in
@@ -322,13 +308,8 @@ private object Utils extends Logging {
   /**
    * Wrapper over newFixedThreadPool.
    */
-  def newDaemonFixedThreadPool(nThreads: Int): ThreadPoolExecutor = {
-    var threadPool = Executors.newFixedThreadPool(nThreads).asInstanceOf[ThreadPoolExecutor]
-
-    threadPool.setThreadFactory(newDaemonThreadFactory)
-
-    return threadPool
-  }
+  def newDaemonFixedThreadPool(nThreads: Int): ThreadPoolExecutor =
+    Executors.newFixedThreadPool(nThreads, daemonThreadFactory).asInstanceOf[ThreadPoolExecutor]
 
   /**
    * Delete a file or directory and its contents recursively.
diff --git a/core/src/main/scala/spark/network/ConnectionManager.scala b/core/src/main/scala/spark/network/ConnectionManager.scala
index 36c01ad629..2ecd14f536 100644
--- a/core/src/main/scala/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/spark/network/ConnectionManager.scala
@@ -52,9 +52,8 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
   val keyInterestChangeRequests = new SynchronizedQueue[(SelectionKey, Int)]
   val sendMessageRequests = new Queue[(Message, SendingConnection)]
 
-  implicit val futureExecContext = ExecutionContext.fromExecutor(
-    Executors.newCachedThreadPool(DaemonThreadFactory))
-  
+  implicit val futureExecContext = ExecutionContext.fromExecutor(Utils.newDaemonCachedThreadPool())
+
   var onReceiveCallback: (BufferMessage, ConnectionManagerId) => Option[Message]= null
 
   serverChannel.configureBlocking(false)
diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
index dff550036d..87f8474ea0 100644
--- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
@@ -20,7 +20,7 @@ private[spark] class LocalScheduler(threads: Int, maxFailures: Int, sc: SparkCon
   with Logging {
 
   var attemptId = new AtomicInteger(0)
-  var threadPool = Executors.newFixedThreadPool(threads, DaemonThreadFactory)
+  var threadPool = Utils.newDaemonFixedThreadPool(threads)
   val env = SparkEnv.get
   var listener: TaskSchedulerListener = null
 
diff --git a/streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala
index 290fab1ce0..04e6b69b7b 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala
@@ -1,6 +1,6 @@
 package spark.streaming.dstream
 
-import spark.{DaemonThread, Logging}
+import spark.Logging
 import spark.storage.StorageLevel
 import spark.streaming.StreamingContext
 
@@ -48,7 +48,8 @@ class RawNetworkReceiver(host: String, port: Int, storageLevel: StorageLevel)
 
     val queue = new ArrayBlockingQueue[ByteBuffer](2)
 
-    blockPushingThread = new DaemonThread {
+    blockPushingThread = new Thread {
+      setDaemon(true)
       override def run() {
         var nextBlockNumber = 0
         while (true) {

From e353886a8ca6179f25b4176d7a62b5d04ce79276 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Tue, 22 Jan 2013 00:23:31 -0800
Subject: [PATCH 156/291] Use generation numbers for fetch failure tracking

---
 .../scala/spark/scheduler/DAGScheduler.scala  | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 59f2099e91..39a1e6d6c6 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -72,8 +72,12 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   val cacheTracker = env.cacheTracker
   val mapOutputTracker = env.mapOutputTracker
 
-  val deadHosts = new HashSet[String]  // TODO: The code currently assumes these can't come back;
-                                       // that's not going to be a realistic assumption in general
+  // For tracking failed nodes, we use the MapOutputTracker's generation number, which is
+  // sent with every task. When we detect a node failing, we note the current generation number
+  // and failed host, increment it for new tasks, and use this to ignore stray ShuffleMapTask
+  // results.
+  // TODO: Garbage collect information about failure generations when new stages start.
+  val failedGeneration = new HashMap[String, Long]
 
   val waiting = new HashSet[Stage] // Stages we need to run whose parents aren't done
   val running = new HashSet[Stage] // Stages we are running right now
@@ -429,7 +433,9 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
             val status = event.result.asInstanceOf[MapStatus]
             val host = status.address.ip
             logInfo("ShuffleMapTask finished with host " + host)
-            if (!deadHosts.contains(host)) {   // TODO: Make sure hostnames are consistent with Mesos
+            if (failedGeneration.contains(host) && smt.generation <= failedGeneration(host)) {
+              logInfo("Ignoring possibly bogus ShuffleMapTask completion from " + host)
+            } else {
               stage.addOutputLoc(smt.partition, status)
             }
             if (running.contains(stage) && pendingTasks(stage).isEmpty) {
@@ -495,7 +501,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
         lastFetchFailureTime = System.currentTimeMillis() // TODO: Use pluggable clock
         // TODO: mark the host as failed only if there were lots of fetch failures on it
         if (bmAddress != null) {
-          handleHostLost(bmAddress.ip)
+          handleHostLost(bmAddress.ip, Some(task.generation))
         }
 
       case other =>
@@ -507,11 +513,15 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   /**
    * Responds to a host being lost. This is called inside the event loop so it assumes that it can
    * modify the scheduler's internal state. Use hostLost() to post a host lost event from outside.
+   *
+   * Optionally the generation during which the failure was caught can be passed to avoid allowing
+   * stray fetch failures from possibly retriggering the detection of a node as lost.
    */
-  def handleHostLost(host: String) {
-    if (!deadHosts.contains(host)) {
+  def handleHostLost(host: String, maybeGeneration: Option[Long] = None) {
+    val currentGeneration = maybeGeneration.getOrElse(mapOutputTracker.getGeneration)
+    if (!failedGeneration.contains(host) || failedGeneration(host) < currentGeneration) {
+      failedGeneration(host) = currentGeneration
       logInfo("Host lost: " + host)
-      deadHosts += host
       env.blockManager.master.notifyADeadHost(host)
       // TODO: This will be really slow if we keep accumulating shuffle map stages
       for ((shuffleId, stage) <- shuffleToMapStage) {
@@ -519,6 +529,9 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
         val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
         mapOutputTracker.registerMapOutputs(shuffleId, locs, true)
       }
+      if (shuffleToMapStage.isEmpty) {
+        mapOutputTracker.incrementGeneration()
+      }
       cacheTracker.cacheLost(host)
       updateCacheLocs()
     }

From 364cdb679cf2b0d5e6ed7ab89628f15594d7947f Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 22 Jan 2013 00:43:31 -0800
Subject: [PATCH 157/291] Refactored DStreamCheckpointData.

---
 .../main/scala/spark/streaming/DStream.scala  | 58 ++-----------
 .../streaming/DStreamCheckpointData.scala     | 84 +++++++++++++++++++
 .../streaming/dstream/KafkaInputDStream.scala |  9 --
 .../spark/streaming/CheckpointSuite.scala     | 12 +--
 4 files changed, 99 insertions(+), 64 deletions(-)
 create mode 100644 streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala

diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index b11ef443dc..3c1861a840 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -12,7 +12,7 @@ import scala.collection.mutable.HashMap
 
 import java.io.{ObjectInputStream, IOException, ObjectOutputStream}
 
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.conf.Configuration
 
 /**
@@ -75,7 +75,7 @@ abstract class DStream[T: ClassManifest] (
   // Checkpoint details
   protected[streaming] val mustCheckpoint = false
   protected[streaming] var checkpointDuration: Duration = null
-  protected[streaming] var checkpointData = new DStreamCheckpointData(HashMap[Time, Any]())
+  protected[streaming] val checkpointData = new DStreamCheckpointData(this)
 
   // Reference to whole DStream graph
   protected[streaming] var graph: DStreamGraph = null
@@ -85,10 +85,10 @@ abstract class DStream[T: ClassManifest] (
   // Duration for which the DStream requires its parent DStream to remember each RDD created
   protected[streaming] def parentRememberDuration = rememberDuration
 
-  /** Returns the StreamingContext associated with this DStream */
+  /** Return the StreamingContext associated with this DStream */
   def context() = ssc
 
-  /** Persists the RDDs of this DStream with the given storage level */
+  /** Persist the RDDs of this DStream with the given storage level */
   def persist(level: StorageLevel): DStream[T] = {
     if (this.isInitialized) {
       throw new UnsupportedOperationException(
@@ -342,40 +342,10 @@ abstract class DStream[T: ClassManifest] (
    */
   protected[streaming] def updateCheckpointData(currentTime: Time) {
     logInfo("Updating checkpoint data for time " + currentTime)
-
-    // Get the checkpointed RDDs from the generated RDDs
-    val newRdds = generatedRDDs.filter(_._2.getCheckpointFile.isDefined)
-                               .map(x => (x._1, x._2.getCheckpointFile.get))
-
-    // Make a copy of the existing checkpoint data (checkpointed RDDs)
-    val oldRdds = checkpointData.rdds.clone()
-
-    // If the new checkpoint data has checkpoints then replace existing with the new one
-    if (newRdds.size > 0) {
-      checkpointData.rdds.clear()
-      checkpointData.rdds ++= newRdds
-    }
-
-    // Make parent DStreams update their checkpoint data
+    checkpointData.update()
     dependencies.foreach(_.updateCheckpointData(currentTime))
-
-    // TODO: remove this, this is just for debugging
-    newRdds.foreach {
-      case (time, data) => { logInfo("Added checkpointed RDD for time " + time + " to stream checkpoint") }
-    }
-
-    if (newRdds.size > 0) {
-      (oldRdds -- newRdds.keySet).foreach {
-        case (time, data) => {
-          val path = new Path(data.toString)
-          val fs = path.getFileSystem(new Configuration())
-          fs.delete(path, true)
-          logInfo("Deleted checkpoint file '" + path + "' for time " + time)
-        }
-      }
-    }
-    logInfo("Updated checkpoint data for time " + currentTime + ", " + checkpointData.rdds.size + " checkpoints, " 
-      + "[" + checkpointData.rdds.mkString(",") + "]")
+    checkpointData.cleanup()
+    logDebug("Updated checkpoint data for time " + currentTime + ": " + checkpointData)
   }
 
   /**
@@ -386,14 +356,8 @@ abstract class DStream[T: ClassManifest] (
    */
   protected[streaming] def restoreCheckpointData() {
     // Create RDDs from the checkpoint data
-    logInfo("Restoring checkpoint data from " + checkpointData.rdds.size + " checkpointed RDDs")
-    checkpointData.rdds.foreach {
-      case(time, data) => {
-        logInfo("Restoring checkpointed RDD for time " + time + " from file '" + data.toString + "'")
-        val rdd = ssc.sc.checkpointFile[T](data.toString)
-        generatedRDDs += ((time, rdd))
-      }
-    }
+    logInfo("Restoring checkpoint data from " + checkpointData.checkpointFiles.size + " checkpointed RDDs")
+    checkpointData.restore()
     dependencies.foreach(_.restoreCheckpointData())
     logInfo("Restored checkpoint data")
   }
@@ -651,7 +615,3 @@ abstract class DStream[T: ClassManifest] (
     ssc.registerOutputStream(this)
   }
 }
-
-private[streaming]
-case class DStreamCheckpointData(rdds: HashMap[Time, Any])
-
diff --git a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala b/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
new file mode 100644
index 0000000000..abf903293f
--- /dev/null
+++ b/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
@@ -0,0 +1,84 @@
+package spark.streaming
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.conf.Configuration
+import collection.mutable.HashMap
+import spark.Logging
+
+
+
+private[streaming]
+class DStreamCheckpointData[T: ClassManifest] (dstream: DStream[T])
+  extends Serializable with Logging {
+  private[streaming] val checkpointFiles = new HashMap[Time, String]()
+  @transient private lazy val fileSystem =
+    new Path(dstream.context.checkpointDir).getFileSystem(new Configuration())
+  @transient private var lastCheckpointFiles: HashMap[Time, String] = null
+
+  /**
+   * Update the checkpoint data of the DStream. Default implementation records the checkpoint files to
+   * which the generate RDDs of the DStream has been saved.
+   */
+  def update() {
+
+    // Get the checkpointed RDDs from the generated RDDs
+    val newCheckpointFiles = dstream.generatedRDDs.filter(_._2.getCheckpointFile.isDefined)
+                                       .map(x => (x._1, x._2.getCheckpointFile.get))
+
+    // Make a copy of the existing checkpoint data (checkpointed RDDs)
+    lastCheckpointFiles = checkpointFiles.clone()
+
+    // If the new checkpoint data has checkpoints then replace existing with the new one
+    if (newCheckpointFiles.size > 0) {
+      checkpointFiles.clear()
+      checkpointFiles ++= newCheckpointFiles
+    }
+
+    // TODO: remove this, this is just for debugging
+    newCheckpointFiles.foreach {
+      case (time, data) => { logInfo("Added checkpointed RDD for time " + time + " to stream checkpoint") }
+    }
+  }
+
+  /**
+   * Cleanup old checkpoint data. Default implementation, cleans up old checkpoint files.
+   */
+  def cleanup() {
+    // If there is at least on checkpoint file in the current checkpoint files,
+    // then delete the old checkpoint files.
+    if (checkpointFiles.size > 0 && lastCheckpointFiles != null) {
+      (lastCheckpointFiles -- checkpointFiles.keySet).foreach {
+        case (time, file) => {
+          try {
+            val path = new Path(file)
+            fileSystem.delete(path, true)
+            logInfo("Deleted checkpoint file '" + file + "' for time " + time)
+          } catch {
+            case e: Exception =>
+              logWarning("Error deleting old checkpoint file '" + file + "' for time " + time, e)
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Restore the checkpoint data. Default implementation restores the RDDs from their
+   * checkpoint files.
+   */
+  def restore() {
+    // Create RDDs from the checkpoint data
+    checkpointFiles.foreach {
+      case(time, file) => {
+        logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'")
+        dstream.generatedRDDs += ((time, dstream.context.sc.checkpointFile[T](file)))
+      }
+    }
+  }
+
+  override def toString() = {
+    "[\n" + checkpointFiles.size + "\n" + checkpointFiles.mkString("\n") + "\n]"
+  }
+}
+
diff --git a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
index 2b4740bdf7..760d9b5cf3 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
@@ -19,15 +19,6 @@ import scala.collection.JavaConversions._
 
 // Key for a specific Kafka Partition: (broker, topic, group, part)
 case class KafkaPartitionKey(brokerId: Int, topic: String, groupId: String, partId: Int)
-// NOT USED - Originally intended for fault-tolerance
-// Metadata for a Kafka Stream that it sent to the Master
-private[streaming]
-case class KafkaInputDStreamMetadata(timestamp: Long, data: Map[KafkaPartitionKey, Long])
-// NOT USED - Originally intended for fault-tolerance
-// Checkpoint data specific to a KafkaInputDstream
-private[streaming]
-case class KafkaDStreamCheckpointData(kafkaRdds: HashMap[Time, Any],
-  savedOffsets: Map[KafkaPartitionKey, Long]) extends DStreamCheckpointData(kafkaRdds)
 
 /**
  * Input stream that pulls messages from a Kafka Broker.
diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
index d2f32c189b..58da4ee539 100644
--- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
@@ -63,9 +63,9 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     // then check whether some RDD has been checkpointed or not
     ssc.start()
     runStreamsWithRealDelay(ssc, firstNumBatches)
-    logInfo("Checkpoint data of state stream = \n[" + stateStream.checkpointData.rdds.mkString(",\n") + "]")
-    assert(!stateStream.checkpointData.rdds.isEmpty, "No checkpointed RDDs in state stream before first failure")
-    stateStream.checkpointData.rdds.foreach {
+    logInfo("Checkpoint data of state stream = \n" + stateStream.checkpointData)
+    assert(!stateStream.checkpointData.checkpointFiles.isEmpty, "No checkpointed RDDs in state stream before first failure")
+    stateStream.checkpointData.checkpointFiles.foreach {
       case (time, data) => {
         val file = new File(data.toString)
         assert(file.exists(), "Checkpoint file '" + file +"' for time " + time + " for state stream before first failure does not exist")
@@ -74,7 +74,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
 
     // Run till a further time such that previous checkpoint files in the stream would be deleted
     // and check whether the earlier checkpoint files are deleted
-    val checkpointFiles = stateStream.checkpointData.rdds.map(x => new File(x._2.toString))
+    val checkpointFiles = stateStream.checkpointData.checkpointFiles.map(x => new File(x._2))
     runStreamsWithRealDelay(ssc, secondNumBatches)
     checkpointFiles.foreach(file => assert(!file.exists, "Checkpoint file '" + file + "' was not deleted"))
     ssc.stop()
@@ -91,8 +91,8 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     // is present in the checkpoint data or not
     ssc.start()
     runStreamsWithRealDelay(ssc, 1)
-    assert(!stateStream.checkpointData.rdds.isEmpty, "No checkpointed RDDs in state stream before second failure")
-    stateStream.checkpointData.rdds.foreach {
+    assert(!stateStream.checkpointData.checkpointFiles.isEmpty, "No checkpointed RDDs in state stream before second failure")
+    stateStream.checkpointData.checkpointFiles.foreach {
       case (time, data) => {
         val file = new File(data.toString)
         assert(file.exists(),

From 7e9ee2e8335f085062d3fdeecd0b49ec63e92117 Mon Sep 17 00:00:00 2001
From: Leemoonsoo <leemoonsoo@gmail.com>
Date: Tue, 22 Jan 2013 23:08:34 +0900
Subject: [PATCH 158/291] Fix for hanging spark.HttpFileServer with kind of
 virtual network

---
 core/src/main/scala/spark/HttpServer.scala | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/HttpServer.scala b/core/src/main/scala/spark/HttpServer.scala
index 0196595ba1..4e0507c080 100644
--- a/core/src/main/scala/spark/HttpServer.scala
+++ b/core/src/main/scala/spark/HttpServer.scala
@@ -4,6 +4,7 @@ import java.io.File
 import java.net.InetAddress
 
 import org.eclipse.jetty.server.Server
+import org.eclipse.jetty.server.bio.SocketConnector
 import org.eclipse.jetty.server.handler.DefaultHandler
 import org.eclipse.jetty.server.handler.HandlerList
 import org.eclipse.jetty.server.handler.ResourceHandler
@@ -27,7 +28,13 @@ private[spark] class HttpServer(resourceBase: File) extends Logging {
     if (server != null) {
       throw new ServerStateException("Server is already started")
     } else {
-      server = new Server(0)
+      server = new Server()
+      val connector = new SocketConnector
+      connector.setMaxIdleTime(60*1000)
+      connector.setSoLingerTime(-1)
+      connector.setPort(0)
+      server.addConnector(connector)
+
       val threadPool = new QueuedThreadPool
       threadPool.setDaemon(true)
       server.setThreadPool(threadPool)

From 588b24197a85c4b46a38595007293abef9a41f2c Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 10:19:30 -0600
Subject: [PATCH 159/291] Use default arguments instead of constructor
 overloads.

---
 core/src/main/scala/spark/SparkContext.scala | 22 +++-----------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 8b6f4b3b7d..495d1b6c78 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -58,27 +58,11 @@ import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend
 class SparkContext(
     val master: String,
     val jobName: String,
-    val sparkHome: String,
-    val jars: Seq[String],
-    environment: Map[String, String])
+    val sparkHome: String = null,
+    val jars: Seq[String] = Nil,
+    environment: Map[String, String] = Map())
   extends Logging {
 
-  /**
-   * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
-   * @param jobName A name for your job, to display on the cluster web UI
-   * @param sparkHome Location where Spark is installed on cluster nodes.
-   * @param jars Collection of JARs to send to the cluster. These can be paths on the local file
-   *             system or HDFS, HTTP, HTTPS, or FTP URLs.
-   */
-  def this(master: String, jobName: String, sparkHome: String, jars: Seq[String]) =
-    this(master, jobName, sparkHome, jars, Map())
-
-  /**
-   * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
-   * @param jobName A name for your job, to display on the cluster web UI
-   */
-  def this(master: String, jobName: String) = this(master, jobName, null, Nil, Map())
-
   // Ensure logging is initialized before we spawn any threads
   initLogging()
 

From 50e2b23927956c14db40093d31bc80892764006a Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Tue, 22 Jan 2013 09:27:33 -0800
Subject: [PATCH 160/291] Fix up some problems from the merge

---
 .../scala/spark/storage/BlockManagerMasterActor.scala | 11 +++++++++++
 .../scala/spark/storage/BlockManagerMessages.scala    |  3 +++
 core/src/main/scala/spark/storage/StorageUtils.scala  |  8 ++++----
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
index f4d026da33..c945c34c71 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
@@ -68,6 +68,9 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
     case GetMemoryStatus =>
       getMemoryStatus
 
+    case GetStorageStatus =>
+      getStorageStatus
+
     case RemoveBlock(blockId) =>
       removeBlock(blockId)
 
@@ -177,6 +180,14 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
     sender ! res
   }
 
+  private def getStorageStatus() {
+    val res = blockManagerInfo.map { case(blockManagerId, info) =>
+      import collection.JavaConverters._
+      StorageStatus(blockManagerId, info.maxMem, info.blocks.asScala.toMap)
+    }
+    sender ! res
+  }
+
   private def register(blockManagerId: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
     val startTimeMs = System.currentTimeMillis()
     val tmp = " " + blockManagerId + " "
diff --git a/core/src/main/scala/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
index d73a9b790f..3a381fd385 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
@@ -100,3 +100,6 @@ case object GetMemoryStatus extends ToBlockManagerMaster
 
 private[spark]
 case object ExpireDeadHosts extends ToBlockManagerMaster
+
+private[spark]
+case object GetStorageStatus extends ToBlockManagerMaster
\ No newline at end of file
diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/spark/storage/StorageUtils.scala
index ebc7390ee5..63ad5c125b 100644
--- a/core/src/main/scala/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/spark/storage/StorageUtils.scala
@@ -1,6 +1,7 @@
 package spark.storage
 
 import spark.SparkContext
+import BlockManagerMasterActor.BlockStatus
 
 private[spark]
 case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long, 
@@ -20,8 +21,8 @@ case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long,
 
 }
 
-case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel, 
-  numPartitions: Int, memSize: Long, diskSize: Long, locations: Array[BlockManagerId])
+case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel,
+  numPartitions: Int, memSize: Long, diskSize: Long)
 
 
 /* Helper methods for storage-related objects */
@@ -58,8 +59,7 @@ object StorageUtils {
       val rddName = Option(sc.persistentRdds.get(rddId).name).getOrElse(rddKey)
       val rddStorageLevel = sc.persistentRdds.get(rddId).getStorageLevel
       
-      RDDInfo(rddId, rddName, rddStorageLevel, rddBlocks.length, memSize, diskSize,
-        rddBlocks.map(_.blockManagerId))
+      RDDInfo(rddId, rddName, rddStorageLevel, rddBlocks.length, memSize, diskSize)
     }.toArray
   }
 

From 27b3f3f0a980f86bac14a14516b5d52a32aa8cbb Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 15:30:42 -0600
Subject: [PATCH 161/291] Handle slaveLost before slaveIdToHost knows about it.

---
 .../scheduler/cluster/ClusterScheduler.scala  | 31 +++++++++++--------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
index 20f6e65020..a639b72795 100644
--- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
@@ -252,19 +252,24 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
   def slaveLost(slaveId: String, reason: ExecutorLossReason) {
     var failedHost: Option[String] = None
     synchronized {
-      val host = slaveIdToHost(slaveId)
-      if (hostsAlive.contains(host)) {
-        logError("Lost an executor on " + host + ": " + reason)
-        slaveIdsWithExecutors -= slaveId
-        hostsAlive -= host
-        activeTaskSetsQueue.foreach(_.hostLost(host))
-        failedHost = Some(host)
-      } else {
-        // We may get multiple slaveLost() calls with different loss reasons. For example, one 
-        // may be triggered by a dropped connection from the slave while another may be a report
-        // of executor termination from Mesos. We produce log messages for both so we eventually
-        // report the termination reason.
-        logError("Lost an executor on " + host + " (already removed): " + reason)
+      slaveIdToHost.get(slaveId) match {
+        case Some(host) =>
+          if (hostsAlive.contains(host)) {
+            logError("Lost an executor on " + host + ": " + reason)
+            slaveIdsWithExecutors -= slaveId
+            hostsAlive -= host
+            activeTaskSetsQueue.foreach(_.hostLost(host))
+            failedHost = Some(host)
+          } else {
+            // We may get multiple slaveLost() calls with different loss reasons. For example, one 
+            // may be triggered by a dropped connection from the slave while another may be a report
+            // of executor termination from Mesos. We produce log messages for both so we eventually
+            // report the termination reason.
+            logError("Lost an executor on " + host + " (already removed): " + reason)
+          }
+        case None =>
+          // We were told about a slave being lost before we could even allocate work to it
+          logError("Lost slave " + slaveId + " (no work assigned yet)")
       }
     }
     if (failedHost != None) {

From 6f2194f7576eb188c23f18125f5101ae0b4e9e4d Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 15:38:58 -0600
Subject: [PATCH 162/291] Call removeJob instead of killing the cluster.

---
 core/src/main/scala/spark/deploy/master/Master.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
index 2c2cd0231b..d1a65204b8 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -103,8 +103,7 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
               val e = new SparkException("Job %s wth ID %s failed %d times.".format(
                 jobInfo.desc.name, jobInfo.id, jobInfo.retryCount))
               logError(e.getMessage, e)
-              throw e
-              //System.exit(1)
+              removeJob(jobInfo)
             }
           }
         }

From 250fe89679bb59ef0d31f74985f72556dcfe2d06 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 16:29:05 -0600
Subject: [PATCH 163/291] Handle Master telling the Worker to kill an
 already-dead executor.

---
 core/src/main/scala/spark/deploy/worker/Worker.scala | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala
index 19bf2be118..d040b86908 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/spark/deploy/worker/Worker.scala
@@ -143,9 +143,13 @@ private[spark] class Worker(
 
     case KillExecutor(jobId, execId) =>
       val fullId = jobId + "/" + execId
-      val executor = executors(fullId)
-      logInfo("Asked to kill executor " + fullId)
-      executor.kill()
+      executors.get(fullId) match {
+        case Some(executor) =>
+          logInfo("Asked to kill executor " + fullId)
+          executor.kill()
+        case None =>
+          logInfo("Asked to kill non-existent existent " + fullId)
+      }
 
     case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>
       masterDisconnected()

From 2437f6741b9c5b0a778d55d324aabdc4642889e5 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 18:01:03 -0600
Subject: [PATCH 164/291] Restore SPARK_MEM in executorEnvs.

---
 core/src/main/scala/spark/SparkContext.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index a5a1b75944..402355bd52 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -111,8 +111,9 @@ class SparkContext(
 
   // Environment variables to pass to our executors
   private[spark] val executorEnvs = HashMap[String, String]()
-  // Note: SPARK_MEM isn't included because it's set directly in ExecutorRunner
-  for (key <- Seq("SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS", "SPARK_TESTING")) {
+  // Note: SPARK_MEM is included for Mesos, but overwritten for standalone mode in ExecutorRunner
+  for (key <- Seq("SPARK_MEM", "SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS",
+      "SPARK_TESTING")) {
     val value = System.getenv(key)
     if (value != null) {
       executorEnvs(key) = value

From fdec42385a1a8f10f9dd803525cb3c132a25ba53 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 18:01:12 -0600
Subject: [PATCH 165/291] Fix SPARK_MEM in ExecutorRunner.

---
 core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
index 2f2ea617ff..e910416235 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
@@ -118,7 +118,7 @@ private[spark] class ExecutorRunner(
       for ((key, value) <- jobDesc.command.environment) {
         env.put(key, value)
       }
-      env.put("SPARK_MEM", memory.toString)
+      env.put("SPARK_MEM", memory.toString + "m")
       // In case we are running this from within the Spark Shell, avoid creating a "scala"
       // parent process for the executor command
       env.put("SPARK_LAUNCH_WITH_SCALA", "0")

From 8c51322cd05f2ae97a08c3af314c7608fcf71b57 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 18:09:10 -0600
Subject: [PATCH 166/291] Don't bother creating an exception.

---
 core/src/main/scala/spark/deploy/master/Master.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
index d1a65204b8..361e5ac627 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -100,9 +100,8 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
             if (jobInfo.incrementRetryCount <= JobState.MAX_NUM_RETRY) {
               schedule()
             } else {
-              val e = new SparkException("Job %s wth ID %s failed %d times.".format(
+              logError("Job %s wth ID %s failed %d times, removing it".format(
                 jobInfo.desc.name, jobInfo.id, jobInfo.retryCount))
-              logError(e.getMessage, e)
               removeJob(jobInfo)
             }
           }

From 98d0b7747d7539db009a9bbc261f899955871524 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 18:11:51 -0600
Subject: [PATCH 167/291] Fix Worker logInfo about unknown executor.

---
 core/src/main/scala/spark/deploy/worker/Worker.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala
index d040b86908..5a83a42daf 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/spark/deploy/worker/Worker.scala
@@ -148,7 +148,7 @@ private[spark] class Worker(
           logInfo("Asked to kill executor " + fullId)
           executor.kill()
         case None =>
-          logInfo("Asked to kill non-existent existent " + fullId)
+          logInfo("Asked to kill unknown executor " + fullId)
       }
 
     case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>

From 35168d9c89904f0dc0bb470c1799f5ca3b04221f Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Tue, 22 Jan 2013 17:54:11 -0800
Subject: [PATCH 168/291] Fix sys.path bug in PySpark SparkContext.addPyFile

---
 python/pyspark/context.py          |  2 --
 python/pyspark/tests.py            | 38 ++++++++++++++++++++++++++----
 python/pyspark/worker.py           |  1 +
 python/test_support/userlibrary.py |  7 ++++++
 4 files changed, 41 insertions(+), 7 deletions(-)
 create mode 100755 python/test_support/userlibrary.py

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index ec0cc7c2f9..b8d7dc05af 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -215,8 +215,6 @@ class SparkContext(object):
         """
         self.addFile(path)
         filename = path.split("/")[-1]
-        os.environ["PYTHONPATH"] = \
-            "%s:%s" % (filename, os.environ["PYTHONPATH"])
 
     def setCheckpointDir(self, dirName, useExisting=False):
         """
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index b0a403b580..4d70ee4f12 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -9,21 +9,32 @@ import time
 import unittest
 
 from pyspark.context import SparkContext
+from pyspark.java_gateway import SPARK_HOME
 
 
-class TestCheckpoint(unittest.TestCase):
+class PySparkTestCase(unittest.TestCase):
 
     def setUp(self):
-        self.sc = SparkContext('local[4]', 'TestPartitioning', batchSize=2)
-        self.checkpointDir = NamedTemporaryFile(delete=False)
-        os.unlink(self.checkpointDir.name)
-        self.sc.setCheckpointDir(self.checkpointDir.name)
+        class_name = self.__class__.__name__
+        self.sc = SparkContext('local[4]', class_name , batchSize=2)
 
     def tearDown(self):
         self.sc.stop()
         # To avoid Akka rebinding to the same port, since it doesn't unbind
         # immediately on shutdown
         self.sc.jvm.System.clearProperty("spark.master.port")
+
+
+class TestCheckpoint(PySparkTestCase):
+
+    def setUp(self):
+        PySparkTestCase.setUp(self)
+        self.checkpointDir = NamedTemporaryFile(delete=False)
+        os.unlink(self.checkpointDir.name)
+        self.sc.setCheckpointDir(self.checkpointDir.name)
+
+    def tearDown(self):
+        PySparkTestCase.tearDown(self)
         shutil.rmtree(self.checkpointDir.name)
 
     def test_basic_checkpointing(self):
@@ -57,5 +68,22 @@ class TestCheckpoint(unittest.TestCase):
         self.assertEquals([1, 2, 3, 4], recovered.collect())
 
 
+class TestAddFile(PySparkTestCase):
+
+    def test_add_py_file(self):
+        # To ensure that we're actually testing addPyFile's effects, check that
+        # this job fails due to `userlibrary` not being on the Python path:
+        def func(x):
+            from userlibrary import UserClass
+            return UserClass().hello()
+        self.assertRaises(Exception,
+                          self.sc.parallelize(range(2)).map(func).first)
+        # Add the file, so the job should now succeed:
+        path = os.path.join(SPARK_HOME, "python/test_support/userlibrary.py")
+        self.sc.addPyFile(path)
+        res = self.sc.parallelize(range(2)).map(func).first()
+        self.assertEqual("Hello World!", res)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index e7bdb7682b..4bf643da66 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -26,6 +26,7 @@ def main():
     split_index = read_int(sys.stdin)
     spark_files_dir = load_pickle(read_with_length(sys.stdin))
     SparkFiles._root_directory = spark_files_dir
+    sys.path.append(spark_files_dir)
     num_broadcast_variables = read_int(sys.stdin)
     for _ in range(num_broadcast_variables):
         bid = read_long(sys.stdin)
diff --git a/python/test_support/userlibrary.py b/python/test_support/userlibrary.py
new file mode 100755
index 0000000000..5bb6f5009f
--- /dev/null
+++ b/python/test_support/userlibrary.py
@@ -0,0 +1,7 @@
+"""
+Used to test shipping of code depenencies with SparkContext.addPyFile().
+"""
+
+class UserClass(object):
+    def hello(self):
+        return "Hello World!"

From fad2b82fc8fb49f2171af10cf7e408d8b8dd7349 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 22 Jan 2013 18:10:00 -0800
Subject: [PATCH 169/291] Added support for saving input files of
 FileInputDStream to graph checkpoints. Modified 'file input stream with
 checkpoint' testcase to test recovery of pre-master-failure input files.

---
 .../main/scala/spark/streaming/DStream.scala  | 29 +++---
 .../streaming/DStreamCheckpointData.scala     | 27 ++++--
 .../scala/spark/streaming/DStreamGraph.scala  |  2 +-
 .../spark/streaming/StreamingContext.scala    |  7 +-
 .../streaming/dstream/FileInputDStream.scala  | 96 +++++++++++++++----
 .../spark/streaming/InputStreamsSuite.scala   | 64 +++++++++----
 6 files changed, 159 insertions(+), 66 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index 3c1861a840..07ecb018ee 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -86,7 +86,7 @@ abstract class DStream[T: ClassManifest] (
   protected[streaming] def parentRememberDuration = rememberDuration
 
   /** Return the StreamingContext associated with this DStream */
-  def context() = ssc
+  def context = ssc
 
   /** Persist the RDDs of this DStream with the given storage level */
   def persist(level: StorageLevel): DStream[T] = {
@@ -159,7 +159,7 @@ abstract class DStream[T: ClassManifest] (
     )
 
     assert(
-     checkpointDuration == null || ssc.sc.checkpointDir.isDefined,
+     checkpointDuration == null || context.sparkContext.checkpointDir.isDefined,
       "The checkpoint directory has not been set. Please use StreamingContext.checkpoint()" +
       " or SparkContext.checkpoint() to set the checkpoint directory."
     )
@@ -298,8 +298,8 @@ abstract class DStream[T: ClassManifest] (
     getOrCompute(time) match {
       case Some(rdd) => {
         val jobFunc = () => {
-          val emptyFunc = { (iterator: Iterator[T]) => {} } 
-          ssc.sc.runJob(rdd, emptyFunc)
+          val emptyFunc = { (iterator: Iterator[T]) => {} }
+          context.sparkContext.runJob(rdd, emptyFunc)
         }
         Some(new Job(time, jobFunc))
       }
@@ -310,10 +310,9 @@ abstract class DStream[T: ClassManifest] (
   /**
    * Dereference RDDs that are older than rememberDuration.
    */
-  protected[streaming] def forgetOldRDDs(time: Time) {
-    val keys = generatedRDDs.keys
+  protected[streaming] def forgetOldMetadata(time: Time) {
     var numForgotten = 0
-    keys.foreach(t => {
+    generatedRDDs.keys.foreach(t => {
       if (t <= (time - rememberDuration)) {
         generatedRDDs.remove(t)
         numForgotten += 1
@@ -321,7 +320,7 @@ abstract class DStream[T: ClassManifest] (
       }
     })
     logInfo("Forgot " + numForgotten + " RDDs from " + this)
-    dependencies.foreach(_.forgetOldRDDs(time))
+    dependencies.foreach(_.forgetOldMetadata(time))
   }
 
   /* Adds metadata to the Stream while it is running. 
@@ -356,7 +355,7 @@ abstract class DStream[T: ClassManifest] (
    */
   protected[streaming] def restoreCheckpointData() {
     // Create RDDs from the checkpoint data
-    logInfo("Restoring checkpoint data from " + checkpointData.checkpointFiles.size + " checkpointed RDDs")
+    logInfo("Restoring checkpoint data")
     checkpointData.restore()
     dependencies.foreach(_.restoreCheckpointData())
     logInfo("Restored checkpoint data")
@@ -397,7 +396,7 @@ abstract class DStream[T: ClassManifest] (
 
   /** Return a new DStream by applying a function to all elements of this DStream. */
   def map[U: ClassManifest](mapFunc: T => U): DStream[U] = {
-    new MappedDStream(this, ssc.sc.clean(mapFunc))
+    new MappedDStream(this, context.sparkContext.clean(mapFunc))
   }
 
   /**
@@ -405,7 +404,7 @@ abstract class DStream[T: ClassManifest] (
    * and then flattening the results
    */
   def flatMap[U: ClassManifest](flatMapFunc: T => Traversable[U]): DStream[U] = {
-    new FlatMappedDStream(this, ssc.sc.clean(flatMapFunc))
+    new FlatMappedDStream(this, context.sparkContext.clean(flatMapFunc))
   }
 
   /** Return a new DStream containing only the elements that satisfy a predicate. */
@@ -427,7 +426,7 @@ abstract class DStream[T: ClassManifest] (
       mapPartFunc: Iterator[T] => Iterator[U],
       preservePartitioning: Boolean = false
     ): DStream[U] = {
-    new MapPartitionedDStream(this, ssc.sc.clean(mapPartFunc), preservePartitioning)
+    new MapPartitionedDStream(this, context.sparkContext.clean(mapPartFunc), preservePartitioning)
   }
 
   /**
@@ -456,7 +455,7 @@ abstract class DStream[T: ClassManifest] (
    * this DStream will be registered as an output stream and therefore materialized.
    */
   def foreach(foreachFunc: (RDD[T], Time) => Unit) {
-    val newStream = new ForEachDStream(this, ssc.sc.clean(foreachFunc))
+    val newStream = new ForEachDStream(this, context.sparkContext.clean(foreachFunc))
     ssc.registerOutputStream(newStream)
     newStream
   }
@@ -474,7 +473,7 @@ abstract class DStream[T: ClassManifest] (
    * on each RDD of this DStream.
    */
   def transform[U: ClassManifest](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = {
-    new TransformedDStream(this, ssc.sc.clean(transformFunc))
+    new TransformedDStream(this, context.sparkContext.clean(transformFunc))
   }
 
   /**
@@ -491,7 +490,7 @@ abstract class DStream[T: ClassManifest] (
       if (first11.size > 10) println("...")
       println()
     }
-    val newStream = new ForEachDStream(this, ssc.sc.clean(foreachFunc))
+    val newStream = new ForEachDStream(this, context.sparkContext.clean(foreachFunc))
     ssc.registerOutputStream(newStream)
   }
 
diff --git a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala b/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
index abf903293f..a375980b84 100644
--- a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
@@ -11,14 +11,17 @@ import spark.Logging
 private[streaming]
 class DStreamCheckpointData[T: ClassManifest] (dstream: DStream[T])
   extends Serializable with Logging {
-  private[streaming] val checkpointFiles = new HashMap[Time, String]()
-  @transient private lazy val fileSystem =
-    new Path(dstream.context.checkpointDir).getFileSystem(new Configuration())
+  protected val data = new HashMap[Time, AnyRef]()
+
+  @transient private var fileSystem : FileSystem = null
   @transient private var lastCheckpointFiles: HashMap[Time, String] = null
 
+  protected[streaming] def checkpointFiles = data.asInstanceOf[HashMap[Time, String]]
+
   /**
-   * Update the checkpoint data of the DStream. Default implementation records the checkpoint files to
-   * which the generate RDDs of the DStream has been saved.
+   * Updates the checkpoint data of the DStream. This gets called every time
+   * the graph checkpoint is initiated. Default implementation records the
+   * checkpoint files to which the generate RDDs of the DStream has been saved.
    */
   def update() {
 
@@ -42,7 +45,9 @@ class DStreamCheckpointData[T: ClassManifest] (dstream: DStream[T])
   }
 
   /**
-   * Cleanup old checkpoint data. Default implementation, cleans up old checkpoint files.
+   * Cleanup old checkpoint data. This gets called every time the graph
+   * checkpoint is initiated, but after `update` is called. Default
+   * implementation, cleans up old checkpoint files.
    */
   def cleanup() {
     // If there is at least on checkpoint file in the current checkpoint files,
@@ -52,6 +57,9 @@ class DStreamCheckpointData[T: ClassManifest] (dstream: DStream[T])
         case (time, file) => {
           try {
             val path = new Path(file)
+            if (fileSystem == null) {
+              fileSystem = path.getFileSystem(new Configuration())
+            }
             fileSystem.delete(path, true)
             logInfo("Deleted checkpoint file '" + file + "' for time " + time)
           } catch {
@@ -64,15 +72,16 @@ class DStreamCheckpointData[T: ClassManifest] (dstream: DStream[T])
   }
 
   /**
-   * Restore the checkpoint data. Default implementation restores the RDDs from their
-   * checkpoint files.
+   * Restore the checkpoint data. This gets called once when the DStream graph
+   * (along with its DStreams) are being restored from a graph checkpoint file.
+   * Default implementation restores the RDDs from their checkpoint files.
    */
   def restore() {
     // Create RDDs from the checkpoint data
     checkpointFiles.foreach {
       case(time, file) => {
         logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'")
-        dstream.generatedRDDs += ((time, dstream.context.sc.checkpointFile[T](file)))
+        dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file)))
       }
     }
   }
diff --git a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
index bc4a40d7bc..d5a5496839 100644
--- a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
@@ -87,7 +87,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
 
   private[streaming] def forgetOldRDDs(time: Time) {
     this.synchronized {
-      outputStreams.foreach(_.forgetOldRDDs(time))
+      outputStreams.foreach(_.forgetOldMetadata(time))
     }
   }
 
diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
index 14500bdcb1..2cf00e3baa 100644
--- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
@@ -61,7 +61,7 @@ class StreamingContext private (
 
   protected[streaming] val isCheckpointPresent = (cp_ != null)
 
-  val sc: SparkContext = {
+  protected[streaming] val sc: SparkContext = {
     if (isCheckpointPresent) {
       new SparkContext(cp_.master, cp_.framework, cp_.sparkHome, cp_.jars)
     } else {
@@ -100,6 +100,11 @@ class StreamingContext private (
   protected[streaming] var receiverJobThread: Thread = null
   protected[streaming] var scheduler: Scheduler = null
 
+  /**
+   * Returns the associated Spark context
+   */
+  def sparkContext = sc
+
   /**
    * Sets each DStreams in this context to remember RDDs it generated in the last given duration.
    * DStreams remember RDDs only for a limited duration of time and releases them for garbage
diff --git a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
index 1e6ad84b44..c6ffb252ce 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
@@ -2,13 +2,14 @@ package spark.streaming.dstream
 
 import spark.RDD
 import spark.rdd.UnionRDD
-import spark.streaming.{StreamingContext, Time}
+import spark.streaming.{DStreamCheckpointData, StreamingContext, Time}
 
 import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
-import scala.collection.mutable.HashSet
+import scala.collection.mutable.{HashSet, HashMap}
+import java.io.{ObjectInputStream, IOException}
 
 private[streaming]
 class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K,V] : ClassManifest](
@@ -18,21 +19,14 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
     newFilesOnly: Boolean = true) 
   extends InputDStream[(K, V)](ssc_) {
 
+  protected[streaming] override val checkpointData = new FileInputDStreamCheckpointData
+
+  private val lastModTimeFiles = new HashSet[String]()
+  private var lastModTime = 0L
+
   @transient private var path_ : Path = null
   @transient private var fs_ : FileSystem = null
-
-  var lastModTime = 0L
-  val lastModTimeFiles = new HashSet[String]()
-
-  def path(): Path = {
-    if (path_ == null) path_ = new Path(directory)
-    path_
-  }
-
-  def fs(): FileSystem = {
-    if (fs_ == null) fs_ = path.getFileSystem(new Configuration())
-    fs_
-  }
+  @transient private var files = new HashMap[Time, Array[String]]
 
   override def start() {
     if (newFilesOnly) {
@@ -79,8 +73,8 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
       }
     }
 
-    val newFiles = fs.listStatus(path, newFilter)
-    logInfo("New files: " + newFiles.map(_.getPath).mkString(", "))
+    val newFiles = fs.listStatus(path, newFilter).map(_.getPath.toString)
+    logInfo("New files: " + newFiles.mkString(", "))
     if (newFiles.length > 0) {
       // Update the modification time and the files processed for that modification time
       if (lastModTime != newFilter.latestModTime) {
@@ -89,9 +83,70 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
       }
       lastModTimeFiles ++= newFilter.latestModTimeFiles
     }
-    val newRDD = new UnionRDD(ssc.sc, newFiles.map(
-      file => ssc.sc.newAPIHadoopFile[K, V, F](file.getPath.toString)))
-    Some(newRDD)
+    files += ((validTime, newFiles))
+    Some(filesToRDD(newFiles))
+  }
+
+  /** Forget the old time-to-files mappings along with old RDDs */
+  protected[streaming] override def forgetOldMetadata(time: Time) {
+    super.forgetOldMetadata(time)
+    val filesToBeRemoved = files.filter(_._1 <= (time - rememberDuration))
+    files --= filesToBeRemoved.keys
+    logInfo("Forgot " + filesToBeRemoved.size + " files from " + this)
+  }
+
+  /** Generate one RDD from an array of files */
+  protected[streaming] def filesToRDD(files: Seq[String]): RDD[(K, V)] = {
+    new UnionRDD(
+      context.sparkContext,
+      files.map(file => context.sparkContext.newAPIHadoopFile[K, V, F](file))
+    )
+  }
+
+  private def path: Path = {
+    if (path_ == null) path_ = new Path(directory)
+    path_
+  }
+
+  private def fs: FileSystem = {
+    if (fs_ == null) fs_ = path.getFileSystem(new Configuration())
+    fs_
+  }
+
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream) {
+    logDebug(this.getClass().getSimpleName + ".readObject used")
+    ois.defaultReadObject()
+    generatedRDDs = new HashMap[Time, RDD[(K,V)]] ()
+    files = new HashMap[Time, Array[String]]
+  }
+
+  /**
+   * A custom version of the DStreamCheckpointData that stores names of
+   * Hadoop files as checkpoint data.
+   */
+  private[streaming]
+  class FileInputDStreamCheckpointData extends DStreamCheckpointData(this) {
+
+     def hadoopFiles = data.asInstanceOf[HashMap[Time, Array[String]]]
+
+    override def update() {
+      hadoopFiles.clear()
+      hadoopFiles ++= files
+    }
+
+    override def cleanup() { }
+
+    override def restore() {
+      hadoopFiles.foreach {
+        case (time, files) => {
+          logInfo("Restoring Hadoop RDD for time " + time + " from files " +
+            files.mkString("[", ",", "]") )
+          files
+          generatedRDDs += ((time, filesToRDD(files)))
+        }
+      }
+    }
   }
 }
 
@@ -100,3 +155,4 @@ object FileInputDStream {
   def defaultFilter(path: Path): Boolean = !path.getName().startsWith(".")
 }
 
+
diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
index d7ba7a5d17..4f6204f205 100644
--- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
@@ -214,10 +214,6 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       //Thread.sleep(100)
     }
     val startTime = System.currentTimeMillis()
-    /*while (output.size < expectedOutput.size && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
-      logInfo("output.size = " + output.size + ", expectedOutput.size = " + expectedOutput.size)
-      Thread.sleep(100)
-    }*/
     Thread.sleep(1000)
     val timeTaken = System.currentTimeMillis() - startTime
     assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
@@ -226,11 +222,9 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
     // Verify whether data received by Spark Streaming was as expected
     logInfo("--------------------------------")
-    logInfo("output.size = " + outputBuffer.size)
-    logInfo("output")
+    logInfo("output, size = " + outputBuffer.size)
     outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output.size = " + expectedOutput.size)
-    logInfo("expected output")
+    logInfo("expected output, size = " + expectedOutput.size)
     expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
     logInfo("--------------------------------")
 
@@ -256,8 +250,13 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     // Set up the streaming context and input streams
     var ssc = new StreamingContext(master, framework, batchDuration)
     ssc.checkpoint(checkpointDir, checkpointInterval)
-    val filestream = ssc.textFileStream(testDir.toString)
-    var outputStream = new TestOutputStream(filestream, new ArrayBuffer[Seq[String]])
+    val fileStream = ssc.textFileStream(testDir.toString)
+    val outputBuffer = new ArrayBuffer[Seq[Int]]
+    // Reduced over a large window to ensure that recovery from master failure
+    // requires reprocessing of all the files seen before the failure
+    val reducedStream = fileStream.map(_.toInt)
+      .reduceByWindow(_ + _, batchDuration * 30, batchDuration)
+    var outputStream = new TestOutputStream(reducedStream, outputBuffer)
     ssc.registerOutputStream(outputStream)
     ssc.start()
 
@@ -266,31 +265,56 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     Thread.sleep(1000)
     for (i <- Seq(1, 2, 3)) {
       FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
-      Thread.sleep(100)
+      // wait to make sure that the file is written such that it gets shown in the file listings
+      Thread.sleep(500)
       clock.addToTime(batchDuration.milliseconds)
+      // wait to make sure that FileInputDStream picks up this file only and not any other file
+      Thread.sleep(500)
     }
-    Thread.sleep(500)
     logInfo("Output = " + outputStream.output.mkString(","))
-    assert(outputStream.output.size > 0)
+    assert(outputStream.output.size > 0, "No files processed before restart")
     ssc.stop()
 
+    for (i <- Seq(4, 5, 6)) {
+      FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
+      Thread.sleep(1000)
+    }
+
     // Restart stream computation from checkpoint and create more files to see whether
     // they are being processed
     logInfo("*********** RESTARTING ************")
     ssc = new StreamingContext(checkpointDir)
     ssc.start()
     clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    Thread.sleep(500)
-    for (i <- Seq(4, 5, 6)) {
+    for (i <- Seq(7, 8, 9)) {
       FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
-      Thread.sleep(100)
+      Thread.sleep(500)
       clock.addToTime(batchDuration.milliseconds)
+      Thread.sleep(500)
     }
-    Thread.sleep(500)
-    outputStream = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[String]]
-    logInfo("Output = " + outputStream.output.mkString(","))
-    assert(outputStream.output.size > 0)
+    Thread.sleep(1000)
+    assert(outputStream.output.size > 0, "No files processed after restart")
     ssc.stop()
+
+    // Append the new output to the old buffer
+    outputStream = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[Int]]
+    outputBuffer ++= outputStream.output
+
+    // Verify whether data received by Spark Streaming was as expected
+    val expectedOutput = Seq(1, 3, 6, 28, 36, 45)
+    logInfo("--------------------------------")
+    logInfo("output, size = " + outputBuffer.size)
+    outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("expected output, size = " + expectedOutput.size)
+    expectedOutput.foreach(x => logInfo("[" + x + "]"))
+    logInfo("--------------------------------")
+
+    // Verify whether all the elements received are as expected
+    assert(outputBuffer.size === expectedOutput.size)
+    for (i <- 0 until outputBuffer.size) {
+      assert(outputBuffer(i).size === 1)
+      assert(outputBuffer(i).head === expectedOutput(i))
+    }
   }
 }
 

From 325297e5c31418f32deeb2a3cc52755094a11cea Mon Sep 17 00:00:00 2001
From: Mikhail Bautin <mbautin@gmail.com>
Date: Tue, 22 Jan 2013 17:31:11 -0800
Subject: [PATCH 170/291] Add an Avro dependency to REPL to make it compile
 with Hadoop 2

---
 pom.xml      | 11 +++++++++++
 repl/pom.xml | 10 ++++++++++
 2 files changed, 21 insertions(+)

diff --git a/pom.xml b/pom.xml
index 483b0f9595..3ea989a082 100644
--- a/pom.xml
+++ b/pom.xml
@@ -542,6 +542,17 @@
             <artifactId>hadoop-client</artifactId>
             <version>2.0.0-mr1-cdh${cdh.version}</version>
           </dependency>
+          <!-- Specify Avro version because Kafka also has it as a dependency -->
+          <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+            <version>1.7.1.cloudera.2</version>
+          </dependency>
+          <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro-ipc</artifactId>
+            <version>1.7.1.cloudera.2</version>
+          </dependency>
         </dependencies>
       </dependencyManagement>
     </profile>
diff --git a/repl/pom.xml b/repl/pom.xml
index 2fc9692969..2dc96beaf5 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -175,6 +175,16 @@
           <artifactId>hadoop-client</artifactId>
           <scope>provided</scope>
         </dependency>
+        <dependency>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+          <scope>provided</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro-ipc</artifactId>
+          <scope>provided</scope>
+        </dependency>
       </dependencies>
       <build>
         <plugins>

From 284993100022cc4bd43bf84a0be4dd91cf7a4ac0 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Tue, 22 Jan 2013 22:19:30 -0800
Subject: [PATCH 171/291] Eliminate CacheTracker.

Replaces DAGScheduler's queries of CacheTracker with BlockManagerMaster
queries.

Adds CacheManager to locally coordinate computation of cached RDDs.
---
 core/src/main/scala/spark/CacheTracker.scala  | 240 ------------------
 core/src/main/scala/spark/RDD.scala           |   2 +-
 core/src/main/scala/spark/SparkEnv.scala      |   8 +-
 .../scala/spark/scheduler/DAGScheduler.scala  |  24 +-
 .../scala/spark/storage/BlockManager.scala    |  24 +-
 .../test/scala/spark/CacheTrackerSuite.scala  | 131 ----------
 6 files changed, 18 insertions(+), 411 deletions(-)
 delete mode 100644 core/src/main/scala/spark/CacheTracker.scala
 delete mode 100644 core/src/test/scala/spark/CacheTrackerSuite.scala

diff --git a/core/src/main/scala/spark/CacheTracker.scala b/core/src/main/scala/spark/CacheTracker.scala
deleted file mode 100644
index 86ad737583..0000000000
--- a/core/src/main/scala/spark/CacheTracker.scala
+++ /dev/null
@@ -1,240 +0,0 @@
-package spark
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-
-import akka.actor._
-import akka.dispatch._
-import akka.pattern.ask
-import akka.remote._
-import akka.util.Duration
-import akka.util.Timeout
-import akka.util.duration._
-
-import spark.storage.BlockManager
-import spark.storage.StorageLevel
-import util.{TimeStampedHashSet, MetadataCleaner, TimeStampedHashMap}
-
-private[spark] sealed trait CacheTrackerMessage
-
-private[spark] case class AddedToCache(rddId: Int, partition: Int, host: String, size: Long = 0L)
-  extends CacheTrackerMessage
-private[spark] case class DroppedFromCache(rddId: Int, partition: Int, host: String, size: Long = 0L)
-  extends CacheTrackerMessage
-private[spark] case class MemoryCacheLost(host: String) extends CacheTrackerMessage
-private[spark] case class RegisterRDD(rddId: Int, numPartitions: Int) extends CacheTrackerMessage
-private[spark] case class SlaveCacheStarted(host: String, size: Long) extends CacheTrackerMessage
-private[spark] case object GetCacheStatus extends CacheTrackerMessage
-private[spark] case object GetCacheLocations extends CacheTrackerMessage
-private[spark] case object StopCacheTracker extends CacheTrackerMessage
-
-private[spark] class CacheTrackerActor extends Actor with Logging {
-  // TODO: Should probably store (String, CacheType) tuples
-  private val locs = new TimeStampedHashMap[Int, Array[List[String]]]
-
-  /**
-   * A map from the slave's host name to its cache size.
-   */
-  private val slaveCapacity = new HashMap[String, Long]
-  private val slaveUsage = new HashMap[String, Long]
-
-  private val metadataCleaner = new MetadataCleaner("CacheTrackerActor", locs.clearOldValues)
-
-  private def getCacheUsage(host: String): Long = slaveUsage.getOrElse(host, 0L)
-  private def getCacheCapacity(host: String): Long = slaveCapacity.getOrElse(host, 0L)
-  private def getCacheAvailable(host: String): Long = getCacheCapacity(host) - getCacheUsage(host)
-
-  def receive = {
-    case SlaveCacheStarted(host: String, size: Long) =>
-      slaveCapacity.put(host, size)
-      slaveUsage.put(host, 0)
-      sender ! true
-
-    case RegisterRDD(rddId: Int, numPartitions: Int) =>
-      logInfo("Registering RDD " + rddId + " with " + numPartitions + " partitions")
-      locs(rddId) = Array.fill[List[String]](numPartitions)(Nil)
-      sender ! true
-
-    case AddedToCache(rddId, partition, host, size) =>
-      slaveUsage.put(host, getCacheUsage(host) + size)
-      locs(rddId)(partition) = host :: locs(rddId)(partition)
-      sender ! true
-
-    case DroppedFromCache(rddId, partition, host, size) =>
-      slaveUsage.put(host, getCacheUsage(host) - size)
-      // Do a sanity check to make sure usage is greater than 0.
-      locs(rddId)(partition) = locs(rddId)(partition).filterNot(_ == host)
-      sender ! true
-
-    case MemoryCacheLost(host) =>
-      logInfo("Memory cache lost on " + host)
-      for ((id, locations) <- locs) {
-        for (i <- 0 until locations.length) {
-          locations(i) = locations(i).filterNot(_ == host)
-        }
-      }
-      sender ! true
-
-    case GetCacheLocations =>
-      logInfo("Asked for current cache locations")
-      sender ! locs.map{case (rrdId, array) => (rrdId -> array.clone())}
-
-    case GetCacheStatus =>
-      val status = slaveCapacity.map { case (host, capacity) =>
-        (host, capacity, getCacheUsage(host))
-      }.toSeq
-      sender ! status
-
-    case StopCacheTracker =>
-      logInfo("Stopping CacheTrackerActor")
-      sender ! true
-      metadataCleaner.cancel()
-      context.stop(self)
-  }
-}
-
-private[spark] class CacheTracker(actorSystem: ActorSystem, isMaster: Boolean, blockManager: BlockManager)
-  extends Logging {
-
-  // Tracker actor on the master, or remote reference to it on workers
-  val ip: String = System.getProperty("spark.master.host", "localhost")
-  val port: Int = System.getProperty("spark.master.port", "7077").toInt
-  val actorName: String = "CacheTracker"
-
-  val timeout = 10.seconds
-
-  var trackerActor: ActorRef = if (isMaster) {
-    val actor = actorSystem.actorOf(Props[CacheTrackerActor], name = actorName)
-    logInfo("Registered CacheTrackerActor actor")
-    actor
-  } else {
-    val url = "akka://spark@%s:%s/user/%s".format(ip, port, actorName)
-    actorSystem.actorFor(url)
-  }
-
-  // TODO: Consider removing this HashSet completely as locs CacheTrackerActor already
-  // keeps track of registered RDDs
-  val registeredRddIds = new TimeStampedHashSet[Int]
-
-  // Remembers which splits are currently being loaded (on worker nodes)
-  val loading = new HashSet[String]
-
-  val metadataCleaner = new MetadataCleaner("CacheTracker", registeredRddIds.clearOldValues)
-
-  // Send a message to the trackerActor and get its result within a default timeout, or
-  // throw a SparkException if this fails.
-  def askTracker(message: Any): Any = {
-    try {
-      val future = trackerActor.ask(message)(timeout)
-      return Await.result(future, timeout)
-    } catch {
-      case e: Exception =>
-        throw new SparkException("Error communicating with CacheTracker", e)
-    }
-  }
-
-  // Send a one-way message to the trackerActor, to which we expect it to reply with true.
-  def communicate(message: Any) {
-    if (askTracker(message) != true) {
-      throw new SparkException("Error reply received from CacheTracker")
-    }
-  }
-
-  // Registers an RDD (on master only)
-  def registerRDD(rddId: Int, numPartitions: Int) {
-    registeredRddIds.synchronized {
-      if (!registeredRddIds.contains(rddId)) {
-        logInfo("Registering RDD ID " + rddId + " with cache")
-        registeredRddIds += rddId
-        communicate(RegisterRDD(rddId, numPartitions))
-      }
-    }
-  }
-
-  // For BlockManager.scala only
-  def cacheLost(host: String) {
-    communicate(MemoryCacheLost(host))
-    logInfo("CacheTracker successfully removed entries on " + host)
-  }
-
-  // Get the usage status of slave caches. Each tuple in the returned sequence
-  // is in the form of (host name, capacity, usage).
-  def getCacheStatus(): Seq[(String, Long, Long)] = {
-    askTracker(GetCacheStatus).asInstanceOf[Seq[(String, Long, Long)]]
-  }
-
-  // For BlockManager.scala only
-  def notifyFromBlockManager(t: AddedToCache) {
-    communicate(t)
-  }
-
-  // Get a snapshot of the currently known locations
-  def getLocationsSnapshot(): HashMap[Int, Array[List[String]]] = {
-    askTracker(GetCacheLocations).asInstanceOf[HashMap[Int, Array[List[String]]]]
-  }
-
-  // Gets or computes an RDD split
-  def getOrCompute[T](rdd: RDD[T], split: Split, context: TaskContext, storageLevel: StorageLevel)
-  : Iterator[T] = {
-    val key = "rdd_%d_%d".format(rdd.id, split.index)
-    logInfo("Cache key is " + key)
-    blockManager.get(key) match {
-      case Some(cachedValues) =>
-        // Split is in cache, so just return its values
-        logInfo("Found partition in cache!")
-        return cachedValues.asInstanceOf[Iterator[T]]
-
-      case None =>
-        // Mark the split as loading (unless someone else marks it first)
-        loading.synchronized {
-          if (loading.contains(key)) {
-            logInfo("Loading contains " + key + ", waiting...")
-            while (loading.contains(key)) {
-              try {loading.wait()} catch {case _ =>}
-            }
-            logInfo("Loading no longer contains " + key + ", so returning cached result")
-            // See whether someone else has successfully loaded it. The main way this would fail
-            // is for the RDD-level cache eviction policy if someone else has loaded the same RDD
-            // partition but we didn't want to make space for it. However, that case is unlikely
-            // because it's unlikely that two threads would work on the same RDD partition. One
-            // downside of the current code is that threads wait serially if this does happen.
-            blockManager.get(key) match {
-              case Some(values) =>
-                return values.asInstanceOf[Iterator[T]]
-              case None =>
-                logInfo("Whoever was loading " + key + " failed; we'll try it ourselves")
-                loading.add(key)
-            }
-          } else {
-            loading.add(key)
-          }
-        }
-        try {
-          // If we got here, we have to load the split
-          val elements = new ArrayBuffer[Any]
-          logInfo("Computing partition " + split)
-          elements ++= rdd.compute(split, context)
-          // Try to put this block in the blockManager
-          blockManager.put(key, elements, storageLevel, true)
-          return elements.iterator.asInstanceOf[Iterator[T]]
-        } finally {
-          loading.synchronized {
-            loading.remove(key)
-            loading.notifyAll()
-          }
-        }
-    }
-  }
-
-  // Called by the Cache to report that an entry has been dropped from it
-  def dropEntry(rddId: Int, partition: Int) {
-    communicate(DroppedFromCache(rddId, partition, Utils.localHostName()))
-  }
-
-  def stop() {
-    communicate(StopCacheTracker)
-    registeredRddIds.clear()
-    trackerActor = null
-  }
-}
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index e0d2eabb1d..c79f34342f 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -176,7 +176,7 @@ abstract class RDD[T: ClassManifest](
     if (isCheckpointed) {
       checkpointData.get.iterator(split, context)
     } else if (storageLevel != StorageLevel.NONE) {
-      SparkEnv.get.cacheTracker.getOrCompute[T](this, split, context, storageLevel)
+      SparkEnv.get.cacheManager.getOrCompute(this, split, context, storageLevel)
     } else {
       compute(split, context)
     }
diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala
index 41441720a7..a080194980 100644
--- a/core/src/main/scala/spark/SparkEnv.scala
+++ b/core/src/main/scala/spark/SparkEnv.scala
@@ -22,7 +22,7 @@ class SparkEnv (
     val actorSystem: ActorSystem,
     val serializer: Serializer,
     val closureSerializer: Serializer,
-    val cacheTracker: CacheTracker,
+    val cacheManager: CacheManager,
     val mapOutputTracker: MapOutputTracker,
     val shuffleFetcher: ShuffleFetcher,
     val broadcastManager: BroadcastManager,
@@ -39,7 +39,6 @@ class SparkEnv (
   def stop() {
     httpFileServer.stop()
     mapOutputTracker.stop()
-    cacheTracker.stop()
     shuffleFetcher.stop()
     broadcastManager.stop()
     blockManager.stop()
@@ -100,8 +99,7 @@ object SparkEnv extends Logging {
     val closureSerializer = instantiateClass[Serializer](
       "spark.closure.serializer", "spark.JavaSerializer")
 
-    val cacheTracker = new CacheTracker(actorSystem, isMaster, blockManager)
-    blockManager.cacheTracker = cacheTracker
+    val cacheManager = new CacheManager(blockManager)
 
     val mapOutputTracker = new MapOutputTracker(actorSystem, isMaster)
 
@@ -122,7 +120,7 @@ object SparkEnv extends Logging {
       actorSystem,
       serializer,
       closureSerializer,
-      cacheTracker,
+      cacheManager,
       mapOutputTracker,
       shuffleFetcher,
       broadcastManager,
diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 59f2099e91..03d173ac3b 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -69,8 +69,8 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   var cacheLocs = new HashMap[Int, Array[List[String]]]
 
   val env = SparkEnv.get
-  val cacheTracker = env.cacheTracker
   val mapOutputTracker = env.mapOutputTracker
+  val blockManagerMaster = env.blockManager.master
 
   val deadHosts = new HashSet[String]  // TODO: The code currently assumes these can't come back;
                                        // that's not going to be a realistic assumption in general
@@ -95,11 +95,17 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   }.start()
 
   def getCacheLocs(rdd: RDD[_]): Array[List[String]] = {
+    if (!cacheLocs.contains(rdd.id)) {
+      val blockIds = rdd.splits.indices.map(index=> "rdd_%d_%d".format(rdd.id, index)).toArray
+      cacheLocs(rdd.id) = blockManagerMaster.getLocations(blockIds).map {
+        locations => locations.map(_.ip).toList
+      }.toArray
+    }
     cacheLocs(rdd.id)
   }
 
-  def updateCacheLocs() {
-    cacheLocs = cacheTracker.getLocationsSnapshot()
+  def clearCacheLocs() {
+    cacheLocs.clear
   }
 
   /**
@@ -126,7 +132,6 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     // Kind of ugly: need to register RDDs with the cache and map output tracker here
     // since we can't do it in the RDD constructor because # of splits is unknown
     logInfo("Registering RDD " + rdd.id + " (" + rdd.origin + ")")
-    cacheTracker.registerRDD(rdd.id, rdd.splits.size)
     if (shuffleDep != None) {
       mapOutputTracker.registerShuffle(shuffleDep.get.shuffleId, rdd.splits.size)
     }
@@ -148,8 +153,6 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
         visited += r
         // Kind of ugly: need to register RDDs with the cache here since
         // we can't do it in its constructor because # of splits is unknown
-        logInfo("Registering parent RDD " + r.id + " (" + r.origin + ")")
-        cacheTracker.registerRDD(r.id, r.splits.size)
         for (dep <- r.dependencies) {
           dep match {
             case shufDep: ShuffleDependency[_,_] =>
@@ -250,7 +253,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
           val runId = nextRunId.getAndIncrement()
           val finalStage = newStage(finalRDD, None, runId)
           val job = new ActiveJob(runId, finalStage, func, partitions, callSite, listener)
-          updateCacheLocs()
+          clearCacheLocs()
           logInfo("Got job " + job.runId + " (" + callSite + ") with " + partitions.length +
                   " output partitions")
           logInfo("Final stage: " + finalStage + " (" + finalStage.origin + ")")
@@ -293,7 +296,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       // on the failed node.
       if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) {
         logInfo("Resubmitting failed stages")
-        updateCacheLocs()
+        clearCacheLocs()
         val failed2 = failed.toArray
         failed.clear()
         for (stage <- failed2.sortBy(_.priority)) {
@@ -443,7 +446,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
                   stage.shuffleDep.get.shuffleId,
                   stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray)
               }
-              updateCacheLocs()
+              clearCacheLocs()
               if (stage.outputLocs.count(_ == Nil) != 0) {
                 // Some tasks had failed; let's resubmit this stage
                 // TODO: Lower-level scheduler should also deal with this
@@ -519,8 +522,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
         val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
         mapOutputTracker.registerMapOutputs(shuffleId, locs, true)
       }
-      cacheTracker.cacheLost(host)
-      updateCacheLocs()
+      clearCacheLocs()
     }
   }
 
diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/spark/storage/BlockManager.scala
index 7a8ac10cdd..e049565f48 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/spark/storage/BlockManager.scala
@@ -16,7 +16,7 @@ import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
 
 import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
 
-import spark.{CacheTracker, Logging, SizeEstimator, SparkEnv, SparkException, Utils}
+import spark.{Logging, SizeEstimator, SparkEnv, SparkException, Utils}
 import spark.network._
 import spark.serializer.Serializer
 import spark.util.{ByteBufferInputStream, IdGenerator, MetadataCleaner, TimeStampedHashMap}
@@ -71,9 +71,6 @@ class BlockManager(
   val connectionManagerId = connectionManager.id
   val blockManagerId = new BlockManagerId(connectionManagerId.host, connectionManagerId.port)
 
-  // TODO: This will be removed after cacheTracker is removed from the code base.
-  var cacheTracker: CacheTracker = null
-
   // Max megabytes of data to keep in flight per reducer (to avoid over-allocating memory
   // for receiving shuffle outputs)
   val maxBytesInFlight =
@@ -662,10 +659,6 @@ class BlockManager(
 
     BlockManager.dispose(bytesAfterPut)
 
-    // TODO: This code will be removed when CacheTracker is gone.
-    if (blockId.startsWith("rdd")) {
-      notifyCacheTracker(blockId)
-    }
     logDebug("Put block " + blockId + " took " + Utils.getUsedTimeMs(startTimeMs))
 
     return size
@@ -733,11 +726,6 @@ class BlockManager(
       }
     }
 
-    // TODO: This code will be removed when CacheTracker is gone.
-    if (blockId.startsWith("rdd")) {
-      notifyCacheTracker(blockId)
-    }
-
     // If replication had started, then wait for it to finish
     if (level.replication > 1) {
       if (replicationFuture == null) {
@@ -780,16 +768,6 @@ class BlockManager(
     }
   }
 
-  // TODO: This code will be removed when CacheTracker is gone.
-  private def notifyCacheTracker(key: String) {
-    if (cacheTracker != null) {
-      val rddInfo = key.split("_")
-      val rddId: Int = rddInfo(1).toInt
-      val partition: Int = rddInfo(2).toInt
-      cacheTracker.notifyFromBlockManager(spark.AddedToCache(rddId, partition, host))
-    }
-  }
-
   /**
    * Read a block consisting of a single object.
    */
diff --git a/core/src/test/scala/spark/CacheTrackerSuite.scala b/core/src/test/scala/spark/CacheTrackerSuite.scala
deleted file mode 100644
index 467605981b..0000000000
--- a/core/src/test/scala/spark/CacheTrackerSuite.scala
+++ /dev/null
@@ -1,131 +0,0 @@
-package spark
-
-import org.scalatest.FunSuite
-
-import scala.collection.mutable.HashMap
-
-import akka.actor._
-import akka.dispatch._
-import akka.pattern.ask
-import akka.remote._
-import akka.util.Duration
-import akka.util.Timeout
-import akka.util.duration._
-
-class CacheTrackerSuite extends FunSuite {
-  // Send a message to an actor and wait for a reply, in a blocking manner
-  private def ask(actor: ActorRef, message: Any): Any = {
-    try {
-      val timeout = 10.seconds
-      val future = actor.ask(message)(timeout)
-      return Await.result(future, timeout)
-    } catch {
-      case e: Exception =>
-        throw new SparkException("Error communicating with actor", e)
-    }
-  }
-
-  test("CacheTrackerActor slave initialization & cache status") {
-    //System.setProperty("spark.master.port", "1345")
-    val initialSize = 2L << 20
-
-    val actorSystem = ActorSystem("test")
-    val tracker = actorSystem.actorOf(Props[CacheTrackerActor])
-
-    assert(ask(tracker, SlaveCacheStarted("host001", initialSize)) === true)
-
-    assert(ask(tracker, GetCacheStatus) === Seq(("host001", 2097152L, 0L)))
-
-    assert(ask(tracker, StopCacheTracker) === true)
-    
-    actorSystem.shutdown()
-    actorSystem.awaitTermination()
-  }
-
-  test("RegisterRDD") {
-    //System.setProperty("spark.master.port", "1345")
-    val initialSize = 2L << 20
-
-    val actorSystem = ActorSystem("test")
-    val tracker = actorSystem.actorOf(Props[CacheTrackerActor])
-
-    assert(ask(tracker, SlaveCacheStarted("host001", initialSize)) === true)
-
-    assert(ask(tracker, RegisterRDD(1, 3)) === true)
-    assert(ask(tracker, RegisterRDD(2, 1)) === true)
-
-    assert(getCacheLocations(tracker) === Map(1 -> List(Nil, Nil, Nil), 2 -> List(Nil)))
-
-    assert(ask(tracker, StopCacheTracker) === true)
-    
-    actorSystem.shutdown()
-    actorSystem.awaitTermination()
-  }
-
-  test("AddedToCache") {
-    //System.setProperty("spark.master.port", "1345")
-    val initialSize = 2L << 20
-
-    val actorSystem = ActorSystem("test")
-    val tracker = actorSystem.actorOf(Props[CacheTrackerActor])
-
-    assert(ask(tracker, SlaveCacheStarted("host001", initialSize)) === true)
-
-    assert(ask(tracker, RegisterRDD(1, 2)) === true)
-    assert(ask(tracker, RegisterRDD(2, 1)) === true)
-
-    assert(ask(tracker, AddedToCache(1, 0, "host001", 2L << 15)) === true)
-    assert(ask(tracker, AddedToCache(1, 1, "host001", 2L << 11)) === true)
-    assert(ask(tracker, AddedToCache(2, 0, "host001", 3L << 10)) === true)
-
-    assert(ask(tracker, GetCacheStatus) === Seq(("host001", 2097152L, 72704L)))
-
-    assert(getCacheLocations(tracker) === 
-      Map(1 -> List(List("host001"), List("host001")), 2 -> List(List("host001"))))
-
-    assert(ask(tracker, StopCacheTracker) === true)
-    
-    actorSystem.shutdown()
-    actorSystem.awaitTermination()
-  }
-
-  test("DroppedFromCache") {
-    //System.setProperty("spark.master.port", "1345")
-    val initialSize = 2L << 20
-
-    val actorSystem = ActorSystem("test")
-    val tracker = actorSystem.actorOf(Props[CacheTrackerActor])
-
-    assert(ask(tracker, SlaveCacheStarted("host001", initialSize)) === true)
-
-    assert(ask(tracker, RegisterRDD(1, 2)) === true)
-    assert(ask(tracker, RegisterRDD(2, 1)) === true)
-
-    assert(ask(tracker, AddedToCache(1, 0, "host001", 2L << 15)) === true)
-    assert(ask(tracker, AddedToCache(1, 1, "host001", 2L << 11)) === true)
-    assert(ask(tracker, AddedToCache(2, 0, "host001", 3L << 10)) === true)
-
-    assert(ask(tracker, GetCacheStatus) === Seq(("host001", 2097152L, 72704L)))
-    assert(getCacheLocations(tracker) ===
-      Map(1 -> List(List("host001"), List("host001")), 2 -> List(List("host001"))))
-
-    assert(ask(tracker, DroppedFromCache(1, 1, "host001", 2L << 11)) === true)
-
-    assert(ask(tracker, GetCacheStatus) === Seq(("host001", 2097152L, 68608L)))
-    assert(getCacheLocations(tracker) ===
-      Map(1 -> List(List("host001"),List()), 2 -> List(List("host001"))))
-
-    assert(ask(tracker, StopCacheTracker) === true)
-    
-    actorSystem.shutdown()
-    actorSystem.awaitTermination()
-  }
-
-  /**
-   * Helper function to get cacheLocations from CacheTracker
-   */
-  def getCacheLocations(tracker: ActorRef): HashMap[Int, List[List[String]]] = {
-    val answer = ask(tracker, GetCacheLocations).asInstanceOf[HashMap[Int, Array[List[String]]]]
-    answer.map { case (i, arr) => (i, arr.toList) }
-  }
-}

From 43e9ff959645e533bcfa0a5c31e62e32c7e9d0a6 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Tue, 22 Jan 2013 22:47:26 -0800
Subject: [PATCH 172/291] Add test for driver hanging on exit (SPARK-530).

---
 core/src/test/scala/spark/DriverSuite.scala | 31 +++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 core/src/test/scala/spark/DriverSuite.scala

diff --git a/core/src/test/scala/spark/DriverSuite.scala b/core/src/test/scala/spark/DriverSuite.scala
new file mode 100644
index 0000000000..70a7c8bc2f
--- /dev/null
+++ b/core/src/test/scala/spark/DriverSuite.scala
@@ -0,0 +1,31 @@
+package spark
+
+import java.io.File
+
+import org.scalatest.FunSuite
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.prop.TableDrivenPropertyChecks._
+import org.scalatest.time.SpanSugar._
+
+class DriverSuite extends FunSuite with Timeouts {
+  test("driver should exit after finishing") {
+    // Regression test for SPARK-530: "Spark driver process doesn't exit after finishing"
+    val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
+    forAll(masters) { (master: String) =>
+      failAfter(10 seconds) {
+        Utils.execute(Seq("./run", "spark.DriverWithoutCleanup", master), new File(System.getenv("SPARK_HOME")))
+      }
+    }
+  }
+}
+
+/**
+ * Program that creates a Spark driver but doesn't call SparkContext.stop() or
+ * Sys.exit() after finishing.
+ */
+object DriverWithoutCleanup {
+  def main(args: Array[String]) {
+    val sc = new SparkContext(args(0), "DriverWithoutCleanup")
+    sc.parallelize(1 to 100, 4).count()
+  }
+}
\ No newline at end of file

From bacade6caf7527737dc6f02b1c2ca9114e02d8bc Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 22 Jan 2013 22:55:26 -0800
Subject: [PATCH 173/291] Modified BlockManagerId API to ensure zero duplicate
 objects. Fixed BlockManagerId testcase in BlockManagerTestSuite.

---
 .../scala/spark/scheduler/MapStatus.scala     |  2 +-
 .../scala/spark/storage/BlockManager.scala    |  2 +-
 .../scala/spark/storage/BlockManagerId.scala  | 33 +++++++++++++++----
 .../spark/storage/BlockManagerMessages.scala  |  3 +-
 .../scala/spark/MapOutputTrackerSuite.scala   | 22 ++++++-------
 .../spark/storage/BlockManagerSuite.scala     | 18 +++++-----
 6 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/MapStatus.scala b/core/src/main/scala/spark/scheduler/MapStatus.scala
index 4532d9497f..fae643f3a8 100644
--- a/core/src/main/scala/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/spark/scheduler/MapStatus.scala
@@ -20,7 +20,7 @@ private[spark] class MapStatus(var address: BlockManagerId, var compressedSizes:
   }
 
   def readExternal(in: ObjectInput) {
-    address = new BlockManagerId(in)
+    address = BlockManagerId(in)
     compressedSizes = new Array[Byte](in.readInt())
     in.readFully(compressedSizes)
   }
diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/spark/storage/BlockManager.scala
index 7a8ac10cdd..596a69c583 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/spark/storage/BlockManager.scala
@@ -69,7 +69,7 @@ class BlockManager(
   implicit val futureExecContext = connectionManager.futureExecContext
 
   val connectionManagerId = connectionManager.id
-  val blockManagerId = new BlockManagerId(connectionManagerId.host, connectionManagerId.port)
+  val blockManagerId = BlockManagerId(connectionManagerId.host, connectionManagerId.port)
 
   // TODO: This will be removed after cacheTracker is removed from the code base.
   var cacheTracker: CacheTracker = null
diff --git a/core/src/main/scala/spark/storage/BlockManagerId.scala b/core/src/main/scala/spark/storage/BlockManagerId.scala
index 488679f049..26c98f2ac8 100644
--- a/core/src/main/scala/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerId.scala
@@ -3,20 +3,35 @@ package spark.storage
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
 import java.util.concurrent.ConcurrentHashMap
 
+/**
+ * This class represent an unique identifier for a BlockManager.
+ * The first 2 constructors of this class is made private to ensure that
+ * BlockManagerId objects can be created only using the factory method in
+ * [[spark.storage.BlockManager$]]. This allows de-duplication of id objects.
+ * Also, constructor parameters are private to ensure that parameters cannot
+ * be modified from outside this class.
+ */
+private[spark] class BlockManagerId private (
+    private var ip_ : String,
+    private var port_ : Int
+  ) extends Externalizable {
+
+  private def this(in: ObjectInput) = this(in.readUTF(), in.readInt())
 
-private[spark] class BlockManagerId(var ip: String, var port: Int) extends Externalizable {
   def this() = this(null, 0)  // For deserialization only
 
-  def this(in: ObjectInput) = this(in.readUTF(), in.readInt())
+  def ip = ip_
+
+  def port = port_
 
   override def writeExternal(out: ObjectOutput) {
-    out.writeUTF(ip)
-    out.writeInt(port)
+    out.writeUTF(ip_)
+    out.writeInt(port_)
   }
 
   override def readExternal(in: ObjectInput) {
-    ip = in.readUTF()
-    port = in.readInt()
+    ip_ = in.readUTF()
+    port_ = in.readInt()
   }
 
   @throws(classOf[IOException])
@@ -35,6 +50,12 @@ private[spark] class BlockManagerId(var ip: String, var port: Int) extends Exter
 
 private[spark] object BlockManagerId {
 
+  def apply(ip: String, port: Int) =
+    getCachedBlockManagerId(new BlockManagerId(ip, port))
+
+  def apply(in: ObjectInput) =
+    getCachedBlockManagerId(new BlockManagerId(in))
+
   val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]()
 
   def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = {
diff --git a/core/src/main/scala/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
index d73a9b790f..7437fc63eb 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
@@ -54,8 +54,7 @@ class UpdateBlockInfo(
   }
 
   override def readExternal(in: ObjectInput) {
-    blockManagerId = new BlockManagerId()
-    blockManagerId.readExternal(in)
+    blockManagerId = BlockManagerId(in)
     blockId = in.readUTF()
     storageLevel = new StorageLevel()
     storageLevel.readExternal(in)
diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
index d3dd3a8fa4..095f415978 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
@@ -47,13 +47,13 @@ class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
     val compressedSize10000 = MapOutputTracker.compressSize(10000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
     val size10000 = MapOutputTracker.decompressSize(compressedSize10000)
-    tracker.registerMapOutput(10, 0, new MapStatus(new BlockManagerId("hostA", 1000),
+    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("hostA", 1000),
         Array(compressedSize1000, compressedSize10000)))
-    tracker.registerMapOutput(10, 1, new MapStatus(new BlockManagerId("hostB", 1000),
+    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("hostB", 1000),
         Array(compressedSize10000, compressedSize1000)))
     val statuses = tracker.getServerStatuses(10, 0)
-    assert(statuses.toSeq === Seq((new BlockManagerId("hostA", 1000), size1000),
-                                  (new BlockManagerId("hostB", 1000), size10000)))
+    assert(statuses.toSeq === Seq((BlockManagerId("hostA", 1000), size1000),
+                                  (BlockManagerId("hostB", 1000), size10000)))
     tracker.stop()
   }
 
@@ -65,14 +65,14 @@ class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
     val compressedSize10000 = MapOutputTracker.compressSize(10000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
     val size10000 = MapOutputTracker.decompressSize(compressedSize10000)
-    tracker.registerMapOutput(10, 0, new MapStatus(new BlockManagerId("hostA", 1000),
+    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("hostA", 1000),
         Array(compressedSize1000, compressedSize1000, compressedSize1000)))
-    tracker.registerMapOutput(10, 1, new MapStatus(new BlockManagerId("hostB", 1000),
+    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("hostB", 1000),
         Array(compressedSize10000, compressedSize1000, compressedSize1000)))
 
     // As if we had two simulatenous fetch failures
-    tracker.unregisterMapOutput(10, 0, new BlockManagerId("hostA", 1000))
-    tracker.unregisterMapOutput(10, 0, new BlockManagerId("hostA", 1000))
+    tracker.unregisterMapOutput(10, 0, BlockManagerId("hostA", 1000))
+    tracker.unregisterMapOutput(10, 0, BlockManagerId("hostA", 1000))
 
     // The remaining reduce task might try to grab the output dispite the shuffle failure;
     // this should cause it to fail, and the scheduler will ignore the failure due to the
@@ -95,13 +95,13 @@ class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
     val compressedSize1000 = MapOutputTracker.compressSize(1000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
     masterTracker.registerMapOutput(10, 0, new MapStatus(
-      new BlockManagerId("hostA", 1000), Array(compressedSize1000)))
+      BlockManagerId("hostA", 1000), Array(compressedSize1000)))
     masterTracker.incrementGeneration()
     slaveTracker.updateGeneration(masterTracker.getGeneration)
     assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
-           Seq((new BlockManagerId("hostA", 1000), size1000)))
+           Seq((BlockManagerId("hostA", 1000), size1000)))
 
-    masterTracker.unregisterMapOutput(10, 0, new BlockManagerId("hostA", 1000))
+    masterTracker.unregisterMapOutput(10, 0, BlockManagerId("hostA", 1000))
     masterTracker.incrementGeneration()
     slaveTracker.updateGeneration(masterTracker.getGeneration)
     intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
diff --git a/core/src/test/scala/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
index 8f86e3170e..a33d3324ba 100644
--- a/core/src/test/scala/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
@@ -82,16 +82,18 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("BlockManagerId object caching") {
-    val id1 = new StorageLevel(false, false, false, 3)
-    val id2 = new StorageLevel(false, false, false, 3)
+    val id1 = BlockManagerId("XXX", 1)
+    val id2 = BlockManagerId("XXX", 1) // this should return the same object as id1
+    assert(id2 === id1, "id2 is not same as id1")
+    assert(id2.eq(id1), "id2 is not the same object as id1")
     val bytes1 = spark.Utils.serialize(id1)
-    val id1_ = spark.Utils.deserialize[StorageLevel](bytes1)
+    val id1_ = spark.Utils.deserialize[BlockManagerId](bytes1)
     val bytes2 = spark.Utils.serialize(id2)
-    val id2_ = spark.Utils.deserialize[StorageLevel](bytes2)
-    assert(id1_ === id1, "Deserialized id1 not same as original id1")
-    assert(id2_ === id2, "Deserialized id2 not same as original id1")
-    assert(id1_ === id2_, "Deserialized id1 not same as deserialized id2")
-    assert(id2_.eq(id1_), "Deserialized id2 not the same object as deserialized level1")
+    val id2_ = spark.Utils.deserialize[BlockManagerId](bytes2)
+    assert(id1_ === id1, "Deserialized id1 is not same as original id1")
+    assert(id1_.eq(id1), "Deserialized id1 is not the same object as original id1")
+    assert(id2_ === id2, "Deserialized id2 is not same as original id2")
+    assert(id2_.eq(id1), "Deserialized id2 is not the same object as original id1")
   }
 
   test("master + 1 manager interaction") {

From 5e11f1e51f17113abb8d3a5bc261af5ba5ffce94 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 22 Jan 2013 23:42:53 -0800
Subject: [PATCH 174/291] Modified StorageLevel API to ensure zero duplicate
 objects.

---
 .../scala/spark/storage/BlockManager.scala    |  5 +-
 .../scala/spark/storage/BlockMessage.scala    |  2 +-
 .../scala/spark/storage/StorageLevel.scala    | 47 ++++++++++++-------
 .../spark/storage/BlockManagerSuite.scala     | 16 +++++--
 4 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/spark/storage/BlockManager.scala
index 596a69c583..ca7eb13ec8 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/spark/storage/BlockManager.scala
@@ -191,7 +191,7 @@ class BlockManager(
         case level =>
           val inMem = level.useMemory && memoryStore.contains(blockId)
           val onDisk = level.useDisk && diskStore.contains(blockId)
-          val storageLevel = new StorageLevel(onDisk, inMem, level.deserialized, level.replication)
+          val storageLevel = StorageLevel(onDisk, inMem, level.deserialized, level.replication)
           val memSize = if (inMem) memoryStore.getSize(blockId) else 0L
           val diskSize = if (onDisk) diskStore.getSize(blockId) else 0L
           (storageLevel, memSize, diskSize, info.tellMaster)
@@ -760,8 +760,7 @@ class BlockManager(
    */
   var cachedPeers: Seq[BlockManagerId] = null
   private def replicate(blockId: String, data: ByteBuffer, level: StorageLevel) {
-    val tLevel: StorageLevel =
-      new StorageLevel(level.useDisk, level.useMemory, level.deserialized, 1)
+    val tLevel = StorageLevel(level.useDisk, level.useMemory, level.deserialized, 1)
     if (cachedPeers == null) {
       cachedPeers = master.getPeers(blockManagerId, level.replication - 1)
     }
diff --git a/core/src/main/scala/spark/storage/BlockMessage.scala b/core/src/main/scala/spark/storage/BlockMessage.scala
index 3f234df654..30d7500e01 100644
--- a/core/src/main/scala/spark/storage/BlockMessage.scala
+++ b/core/src/main/scala/spark/storage/BlockMessage.scala
@@ -64,7 +64,7 @@ private[spark] class BlockMessage() {
 
       val booleanInt = buffer.getInt()
       val replication = buffer.getInt()
-      level = new StorageLevel(booleanInt, replication)
+      level = StorageLevel(booleanInt, replication)
       
       val dataLength = buffer.getInt()
       data = ByteBuffer.allocate(dataLength)
diff --git a/core/src/main/scala/spark/storage/StorageLevel.scala b/core/src/main/scala/spark/storage/StorageLevel.scala
index e3544e5aae..f2535ae5ae 100644
--- a/core/src/main/scala/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/spark/storage/StorageLevel.scala
@@ -7,25 +7,30 @@ import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
  * whether to drop the RDD to disk if it falls out of memory, whether to keep the data in memory
  * in a serialized format, and whether to replicate the RDD partitions on multiple nodes.
  * The [[spark.storage.StorageLevel$]] singleton object contains some static constants for
- * commonly useful storage levels.
+ * commonly useful storage levels. The recommended method to create your own storage level
+ * object is to use `StorageLevel.apply(...)` from the singleton object.
  */
 class StorageLevel(
-    var useDisk: Boolean,
-    var useMemory: Boolean,
-    var deserialized: Boolean,
-    var replication: Int = 1)
+    private var useDisk_ : Boolean,
+    private var useMemory_ : Boolean,
+    private var deserialized_ : Boolean,
+    private var replication_ : Int = 1)
   extends Externalizable {
 
   // TODO: Also add fields for caching priority, dataset ID, and flushing.
-
-  assert(replication < 40, "Replication restricted to be less than 40 for calculating hashcodes")
-
-  def this(flags: Int, replication: Int) {
+  private def this(flags: Int, replication: Int) {
     this((flags & 4) != 0, (flags & 2) != 0, (flags & 1) != 0, replication)
   }
 
   def this() = this(false, true, false)  // For deserialization
 
+  def useDisk = useDisk_
+  def useMemory = useMemory_
+  def deserialized = deserialized_
+  def replication = replication_
+
+  assert(replication < 40, "Replication restricted to be less than 40 for calculating hashcodes")
+
   override def clone(): StorageLevel = new StorageLevel(
     this.useDisk, this.useMemory, this.deserialized, this.replication)
 
@@ -43,13 +48,13 @@ class StorageLevel(
 
   def toInt: Int = {
     var ret = 0
-    if (useDisk) {
+    if (useDisk_) {
       ret |= 4
     }
-    if (useMemory) {
+    if (useMemory_) {
       ret |= 2
     }
-    if (deserialized) {
+    if (deserialized_) {
       ret |= 1
     }
     return ret
@@ -57,15 +62,15 @@ class StorageLevel(
 
   override def writeExternal(out: ObjectOutput) {
     out.writeByte(toInt)
-    out.writeByte(replication)
+    out.writeByte(replication_)
   }
 
   override def readExternal(in: ObjectInput) {
     val flags = in.readByte()
-    useDisk = (flags & 4) != 0
-    useMemory = (flags & 2) != 0
-    deserialized = (flags & 1) != 0
-    replication = in.readByte()
+    useDisk_ = (flags & 4) != 0
+    useMemory_ = (flags & 2) != 0
+    deserialized_ = (flags & 1) != 0
+    replication_ = in.readByte()
   }
 
   @throws(classOf[IOException])
@@ -91,6 +96,14 @@ object StorageLevel {
   val MEMORY_AND_DISK_SER = new StorageLevel(true, true, false)
   val MEMORY_AND_DISK_SER_2 = new StorageLevel(true, true, false, 2)
 
+  /** Create a new StorageLevel object */
+  def apply(useDisk: Boolean, useMemory: Boolean, deserialized: Boolean, replication: Int = 1) =
+    getCachedStorageLevel(new StorageLevel(useDisk, useMemory, deserialized, replication))
+
+  /** Create a new StorageLevel object from its integer representation */
+  def apply(flags: Int, replication: Int) =
+    getCachedStorageLevel(new StorageLevel(flags, replication))
+
   private[spark]
   val storageLevelCache = new java.util.concurrent.ConcurrentHashMap[StorageLevel, StorageLevel]()
 
diff --git a/core/src/test/scala/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
index a33d3324ba..a1aeb12f25 100644
--- a/core/src/test/scala/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
@@ -69,23 +69,29 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("StorageLevel object caching") {
-    val level1 = new StorageLevel(false, false, false, 3)
-    val level2 = new StorageLevel(false, false, false, 3)
+    val level1 = StorageLevel(false, false, false, 3)
+    val level2 = StorageLevel(false, false, false, 3) // this should return the same object as level1
+    val level3 = StorageLevel(false, false, false, 2) // this should return a different object
+    assert(level2 === level1, "level2 is not same as level1")
+    assert(level2.eq(level1), "level2 is not the same object as level1")
+    assert(level3 != level1, "level3 is same as level1")
     val bytes1 = spark.Utils.serialize(level1)
     val level1_ = spark.Utils.deserialize[StorageLevel](bytes1)
     val bytes2 = spark.Utils.serialize(level2)
     val level2_ = spark.Utils.deserialize[StorageLevel](bytes2)
     assert(level1_ === level1, "Deserialized level1 not same as original level1")
-    assert(level2_ === level2, "Deserialized level2 not same as original level1")
-    assert(level1_ === level2_, "Deserialized level1 not same as deserialized level2")
-    assert(level2_.eq(level1_), "Deserialized level2 not the same object as deserialized level1")
+    assert(level1_.eq(level1), "Deserialized level1 not the same object as original level2")
+    assert(level2_ === level2, "Deserialized level2 not same as original level2")
+    assert(level2_.eq(level1), "Deserialized level2 not the same object as original level1")
   }
 
   test("BlockManagerId object caching") {
     val id1 = BlockManagerId("XXX", 1)
     val id2 = BlockManagerId("XXX", 1) // this should return the same object as id1
+    val id3 = BlockManagerId("XXX", 2) // this should return a different object
     assert(id2 === id1, "id2 is not same as id1")
     assert(id2.eq(id1), "id2 is not the same object as id1")
+    assert(id3 != id1, "id3 is same as id1")
     val bytes1 = spark.Utils.serialize(id1)
     val id1_ = spark.Utils.deserialize[BlockManagerId](bytes1)
     val bytes2 = spark.Utils.serialize(id2)

From 155f31398dc83ecb88b4b3e07849a2a8a0a6592f Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 23 Jan 2013 01:10:26 -0800
Subject: [PATCH 175/291] Made StorageLevel constructor private, and added
 StorageLevels.create() to the Java API. Updates scala and java programming
 guides.

---
 core/src/main/scala/spark/api/java/StorageLevels.java | 11 +++++++++++
 core/src/main/scala/spark/storage/StorageLevel.scala  |  6 +++---
 docs/java-programming-guide.md                        |  3 ++-
 docs/scala-programming-guide.md                       |  3 ++-
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/spark/api/java/StorageLevels.java b/core/src/main/scala/spark/api/java/StorageLevels.java
index 722af3c06c..5e5845ac3a 100644
--- a/core/src/main/scala/spark/api/java/StorageLevels.java
+++ b/core/src/main/scala/spark/api/java/StorageLevels.java
@@ -17,4 +17,15 @@ public class StorageLevels {
   public static final StorageLevel MEMORY_AND_DISK_2 = new StorageLevel(true, true, true, 2);
   public static final StorageLevel MEMORY_AND_DISK_SER = new StorageLevel(true, true, false, 1);
   public static final StorageLevel MEMORY_AND_DISK_SER_2 = new StorageLevel(true, true, false, 2);
+
+  /**
+   * Create a new StorageLevel object.
+   * @param useDisk saved to disk, if true
+   * @param useMemory saved to memory, if true
+   * @param deserialized saved as deserialized objects, if true
+   * @param replication replication factor
+   */
+  public static StorageLevel create(boolean useDisk, boolean useMemory, boolean deserialized, int replication) {
+    return StorageLevel.apply(useDisk, useMemory, deserialized, replication);
+  }
 }
diff --git a/core/src/main/scala/spark/storage/StorageLevel.scala b/core/src/main/scala/spark/storage/StorageLevel.scala
index f2535ae5ae..45d6ea2656 100644
--- a/core/src/main/scala/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/spark/storage/StorageLevel.scala
@@ -7,10 +7,10 @@ import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
  * whether to drop the RDD to disk if it falls out of memory, whether to keep the data in memory
  * in a serialized format, and whether to replicate the RDD partitions on multiple nodes.
  * The [[spark.storage.StorageLevel$]] singleton object contains some static constants for
- * commonly useful storage levels. The recommended method to create your own storage level
- * object is to use `StorageLevel.apply(...)` from the singleton object.
+ * commonly useful storage levels. To create your own storage level object, use the factor method
+ * of the singleton object (`StorageLevel(...)`).
  */
-class StorageLevel(
+class StorageLevel private(
     private var useDisk_ : Boolean,
     private var useMemory_ : Boolean,
     private var deserialized_ : Boolean,
diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md
index 188ca4995e..37a906ea1c 100644
--- a/docs/java-programming-guide.md
+++ b/docs/java-programming-guide.md
@@ -75,7 +75,8 @@ class has a single abstract method, `call()`, that must be implemented.
 ## Storage Levels
 
 RDD [storage level](scala-programming-guide.html#rdd-persistence) constants, such as `MEMORY_AND_DISK`, are
-declared in the [spark.api.java.StorageLevels](api/core/index.html#spark.api.java.StorageLevels) class.
+declared in the [spark.api.java.StorageLevels](api/core/index.html#spark.api.java.StorageLevels) class. To
+define your own storage level, you can use StorageLevels.create(...). 
 
 
 # Other Features
diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md
index 7350eca837..301b330a79 100644
--- a/docs/scala-programming-guide.md
+++ b/docs/scala-programming-guide.md
@@ -301,7 +301,8 @@ We recommend going through the following process to select one:
 * Use the replicated storage levels if you want fast fault recovery (e.g. if using Spark to serve requests from a web
   application). *All* the storage levels provide full fault tolerance by recomputing lost data, but the replicated ones
   let you continue running tasks on the RDD without waiting to recompute a lost partition.
-  
+ 
+If you want to define your own storage level (say, with replication factor of 3 instead of 2), then use the function factor method `apply()` of the [`StorageLevel`](api/core/index.html#spark.storage.StorageLevel$) singleton object.  
 
 # Shared Variables
 

From 9a27062260490336a3bfa97c6efd39b1e7e81573 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 01:34:44 -0800
Subject: [PATCH 176/291] Force generation increment after shuffle map stage

---
 core/src/main/scala/spark/scheduler/DAGScheduler.scala | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 39a1e6d6c6..d8a9049e81 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -445,9 +445,16 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
               logInfo("waiting: " + waiting)
               logInfo("failed: " + failed)
               if (stage.shuffleDep != None) {
+                // We supply true to increment the generation number here in case this is a
+                // recomputation of the map outputs. In that case, some nodes may have cached
+                // locations with holes (from when we detected the error) and will need the
+                // generation incremented to refetch them.
+                // TODO: Only increment the generation number if this is not the first time
+                //       we registered these map outputs.
                 mapOutputTracker.registerMapOutputs(
                   stage.shuffleDep.get.shuffleId,
-                  stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray)
+                  stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray,
+                  true)
               }
               updateCacheLocs()
               if (stage.outputLocs.count(_ == Nil) != 0) {

From d209b6b7641059610f734414ea05e0494b5510b0 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 01:35:14 -0800
Subject: [PATCH 177/291] Extra debugging from hostLost()

---
 core/src/main/scala/spark/scheduler/DAGScheduler.scala | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index d8a9049e81..740aec2e61 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -528,7 +528,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     val currentGeneration = maybeGeneration.getOrElse(mapOutputTracker.getGeneration)
     if (!failedGeneration.contains(host) || failedGeneration(host) < currentGeneration) {
       failedGeneration(host) = currentGeneration
-      logInfo("Host lost: " + host)
+      logInfo("Host lost: " + host + " (generation " + currentGeneration + ")")
       env.blockManager.master.notifyADeadHost(host)
       // TODO: This will be really slow if we keep accumulating shuffle map stages
       for ((shuffleId, stage) <- shuffleToMapStage) {
@@ -541,6 +541,9 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       }
       cacheTracker.cacheLost(host)
       updateCacheLocs()
+    } else {
+      logDebug("Additional host lost message for " + host +
+               "(generation " + currentGeneration + ")")
     }
   }
 

From 0b506dd2ecec909cd514143389d0846db2d194ed Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 01:37:51 -0800
Subject: [PATCH 178/291] Add tests of various node failure scenarios.

---
 .../test/scala/spark/DistributedSuite.scala   | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/core/src/test/scala/spark/DistributedSuite.scala b/core/src/test/scala/spark/DistributedSuite.scala
index cacc2796b6..0d6b265e54 100644
--- a/core/src/test/scala/spark/DistributedSuite.scala
+++ b/core/src/test/scala/spark/DistributedSuite.scala
@@ -188,4 +188,76 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
     val values = sc.parallelize(1 to 2, 2).map(x => System.getenv("TEST_VAR")).collect()
     assert(values.toSeq === Seq("TEST_VALUE", "TEST_VALUE"))
   }
+
+  test("recover from node failures") {
+    import DistributedSuite.{markNodeIfIdentity, failOnMarkedIdentity}
+    DistributedSuite.amMaster = true
+    sc = new SparkContext(clusterUrl, "test")
+    val data = sc.parallelize(Seq(true, true), 2)
+    val singleton = sc.parallelize(Seq(true), 1)
+    assert(data.count === 2) // force executors to start
+    val masterId = SparkEnv.get.blockManager.blockManagerId
+    assert(data.map(markNodeIfIdentity).collect.size === 2)
+    assert(data.map(failOnMarkedIdentity).collect.size === 2)
+  }
+
+  test("recover from repeated node failures during shuffle-map") {
+    import DistributedSuite.{markNodeIfIdentity, failOnMarkedIdentity}
+    DistributedSuite.amMaster = true
+    sc = new SparkContext(clusterUrl, "test")
+    for (i <- 1 to 3) {
+      val data = sc.parallelize(Seq(true, false), 2)
+      val singleton = sc.parallelize(Seq(false), 1)
+      assert(data.count === 2)
+      assert(data.map(markNodeIfIdentity).collect.size === 2)
+      assert(data.map(failOnMarkedIdentity).map(x => x -> x).groupByKey.count === 2)
+    }
+  }
+
+  test("recover from repeated node failures during shuffle-reduce") {
+    import DistributedSuite.{markNodeIfIdentity, failOnMarkedIdentity}
+    DistributedSuite.amMaster = true
+    sc = new SparkContext(clusterUrl, "test")
+    for (i <- 1 to 3) {
+      val data = sc.parallelize(Seq(true, true), 2)
+      val singleton = sc.parallelize(Seq(false), 1)
+      assert(data.count === 2)
+      assert(data.map(markNodeIfIdentity).collect.size === 2)
+      // This relies on mergeCombiners being used to perform the actual reduce for this
+      // test to actually be testing what it claims.
+      val grouped = data.map(x => x -> x).combineByKey(
+                      x => x,
+                      (x: Boolean, y: Boolean) => x,
+                      (x: Boolean, y: Boolean) => failOnMarkedIdentity(x)
+                    )
+      assert(grouped.collect.size === 1)
+    }
+  }
+}
+
+object DistributedSuite {
+  // Indicates whether this JVM is marked for failure.
+  var mark = false
+  
+  // Set by test to remember if we are in the driver program so we can assert
+  // that we are not.
+  var amMaster = false
+
+  // Act like an identity function, but if the argument is true, set mark to true.
+  def markNodeIfIdentity(item: Boolean): Boolean = {
+    if (item) {
+      assert(!amMaster)
+      mark = true
+    }
+    item
+  }
+
+  // Act like an identity function, but if mark was set to true previously, fail,
+  // crashing the entire JVM.
+  def failOnMarkedIdentity(item: Boolean): Boolean = {
+    if (mark) { 
+      System.exit(42)
+    } 
+    item
+  } 
 }

From 79d55700ce2559051ac61cc2fb72a67fd7035926 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 23 Jan 2013 01:57:09 -0800
Subject: [PATCH 179/291] One more fix. Made even default constructor of
 BlockManagerId private to prevent such problems in the future.

---
 .../src/main/scala/spark/storage/BlockManagerId.scala | 11 ++++++-----
 .../scala/spark/storage/BlockManagerMessages.scala    |  3 +--
 core/src/main/scala/spark/storage/StorageLevel.scala  |  7 +++++++
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/spark/storage/BlockManagerId.scala b/core/src/main/scala/spark/storage/BlockManagerId.scala
index 26c98f2ac8..abb8b45a1f 100644
--- a/core/src/main/scala/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerId.scala
@@ -16,9 +16,7 @@ private[spark] class BlockManagerId private (
     private var port_ : Int
   ) extends Externalizable {
 
-  private def this(in: ObjectInput) = this(in.readUTF(), in.readInt())
-
-  def this() = this(null, 0)  // For deserialization only
+  private def this() = this(null, 0)  // For deserialization only
 
   def ip = ip_
 
@@ -53,8 +51,11 @@ private[spark] object BlockManagerId {
   def apply(ip: String, port: Int) =
     getCachedBlockManagerId(new BlockManagerId(ip, port))
 
-  def apply(in: ObjectInput) =
-    getCachedBlockManagerId(new BlockManagerId(in))
+  def apply(in: ObjectInput) = {
+    val obj = new BlockManagerId()
+    obj.readExternal(in)
+    getCachedBlockManagerId(obj)
+  }
 
   val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]()
 
diff --git a/core/src/main/scala/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
index 7437fc63eb..30483b0b37 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
@@ -56,8 +56,7 @@ class UpdateBlockInfo(
   override def readExternal(in: ObjectInput) {
     blockManagerId = BlockManagerId(in)
     blockId = in.readUTF()
-    storageLevel = new StorageLevel()
-    storageLevel.readExternal(in)
+    storageLevel = StorageLevel(in)
     memSize = in.readInt()
     diskSize = in.readInt()
   }
diff --git a/core/src/main/scala/spark/storage/StorageLevel.scala b/core/src/main/scala/spark/storage/StorageLevel.scala
index 45d6ea2656..d1d1c61c1c 100644
--- a/core/src/main/scala/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/spark/storage/StorageLevel.scala
@@ -104,6 +104,13 @@ object StorageLevel {
   def apply(flags: Int, replication: Int) =
     getCachedStorageLevel(new StorageLevel(flags, replication))
 
+  /** Read StorageLevel object from ObjectInput stream */
+  def apply(in: ObjectInput) = {
+    val obj = new StorageLevel()
+    obj.readExternal(in)
+    getCachedStorageLevel(obj)
+  }
+
   private[spark]
   val storageLevelCache = new java.util.concurrent.ConcurrentHashMap[StorageLevel, StorageLevel]()
 

From 666ce431aa03239d580a8c78b3a2f34a851eb413 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 23 Jan 2013 03:15:36 -0800
Subject: [PATCH 180/291] Added support for rescheduling unprocessed batches on
 master failure.

---
 .../scala/spark/streaming/Checkpoint.scala    |  3 +-
 .../scala/spark/streaming/JobManager.scala    | 30 ++++++++++++++++++-
 .../scala/spark/streaming/Scheduler.scala     |  5 +++-
 .../spark/streaming/StreamingContext.scala    |  4 +--
 .../spark/streaming/InputStreamsSuite.scala   | 23 +++++++++-----
 5 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/spark/streaming/Checkpoint.scala
index 2f3adb39c2..b9eb7f8ec4 100644
--- a/streaming/src/main/scala/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/spark/streaming/Checkpoint.scala
@@ -17,7 +17,8 @@ class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time)
   val jars = ssc.sc.jars
   val graph = ssc.graph
   val checkpointDir = ssc.checkpointDir
-  val checkpointDuration: Duration = ssc.checkpointDuration
+  val checkpointDuration = ssc.checkpointDuration
+  val pendingTimes = ssc.scheduler.jobManager.getPendingTimes()
 
   def validate() {
     assert(master != null, "Checkpoint.master is null")
diff --git a/streaming/src/main/scala/spark/streaming/JobManager.scala b/streaming/src/main/scala/spark/streaming/JobManager.scala
index 3b910538e0..5acdd01e58 100644
--- a/streaming/src/main/scala/spark/streaming/JobManager.scala
+++ b/streaming/src/main/scala/spark/streaming/JobManager.scala
@@ -3,6 +3,8 @@ package spark.streaming
 import spark.Logging 
 import spark.SparkEnv
 import java.util.concurrent.Executors
+import collection.mutable.HashMap
+import collection.mutable.ArrayBuffer
 
 
 private[streaming]
@@ -19,15 +21,41 @@ class JobManager(ssc: StreamingContext, numThreads: Int = 1) extends Logging {
         case e: Exception =>
           logError("Running " + job + " failed", e)
       }
+      clearJob(job)
     }
   }
 
   initLogging()
 
   val jobExecutor = Executors.newFixedThreadPool(numThreads) 
-  
+  val jobs = new HashMap[Time, ArrayBuffer[Job]]
+
   def runJob(job: Job) {
+    jobs.synchronized {
+      jobs.getOrElseUpdate(job.time, new ArrayBuffer[Job]) += job
+    }
     jobExecutor.execute(new JobHandler(ssc, job))
     logInfo("Added " + job + " to queue")
   }
+
+  private def clearJob(job: Job) {
+    jobs.synchronized {
+      val jobsOfTime = jobs.get(job.time)
+      if (jobsOfTime.isDefined) {
+        jobsOfTime.get -= job
+        if (jobsOfTime.get.isEmpty) {
+          jobs -= job.time
+        }
+      } else {
+        throw new Exception("Job finished for time " + job.time +
+          " but time does not exist in jobs")
+      }
+    }
+  }
+
+  def getPendingTimes(): Array[Time] = {
+    jobs.synchronized {
+      jobs.keySet.toArray
+    }
+  }
 }
diff --git a/streaming/src/main/scala/spark/streaming/Scheduler.scala b/streaming/src/main/scala/spark/streaming/Scheduler.scala
index c04ed37de8..b77986a3ba 100644
--- a/streaming/src/main/scala/spark/streaming/Scheduler.scala
+++ b/streaming/src/main/scala/spark/streaming/Scheduler.scala
@@ -35,10 +35,13 @@ class Scheduler(ssc: StreamingContext) extends Logging {
       // either set the manual clock to the last checkpointed time,
       // or if the property is defined set it to that time
       if (clock.isInstanceOf[ManualClock]) {
-        val lastTime = ssc.getInitialCheckpoint.checkpointTime.milliseconds
+        val lastTime = ssc.initialCheckpoint.checkpointTime.milliseconds
         val jumpTime = System.getProperty("spark.streaming.manualClock.jump", "0").toLong
         clock.asInstanceOf[ManualClock].setTime(lastTime + jumpTime)
       }
+      // Reschedule the batches that were received but not processed before failure
+      ssc.initialCheckpoint.pendingTimes.foreach(time => generateRDDs(time))
+      // Restart the timer
       timer.restart(graph.zeroTime.milliseconds)
       logInfo("Scheduler's timer restarted")
     } else {
diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
index 2cf00e3baa..5781b1cc72 100644
--- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
@@ -133,7 +133,7 @@ class StreamingContext private (
     }
   }
 
-  protected[streaming] def getInitialCheckpoint(): Checkpoint = {
+  protected[streaming] def initialCheckpoint: Checkpoint = {
     if (isCheckpointPresent) cp_ else null
   }
 
@@ -367,7 +367,7 @@ class StreamingContext private (
   }
 
   /**
-   * Sstops the execution of the streams.
+   * Stops the execution of the streams.
    */
   def stop() {
     try {
diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
index 4f6204f205..34e51e9562 100644
--- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
@@ -44,7 +44,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.master.port")
   }
-
+  /*
   test("network input stream") {
     // Start the server
     testServer = new TestServer(testPort)
@@ -236,8 +236,8 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       assert(output(i).head.toString === expectedOutput(i))
     }
   }
-
-  test("file input stream with checkpoint") {
+  */
+  test("file input stream with master failure") {
     // Create a temporary directory
     testDir = {
       var temp = File.createTempFile(".temp.", Random.nextInt().toString)
@@ -251,11 +251,17 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     var ssc = new StreamingContext(master, framework, batchDuration)
     ssc.checkpoint(checkpointDir, checkpointInterval)
     val fileStream = ssc.textFileStream(testDir.toString)
-    val outputBuffer = new ArrayBuffer[Seq[Int]]
-    // Reduced over a large window to ensure that recovery from master failure
+    // Making value 3 take large time to process, to ensure that the master
+    // shuts down in the middle of processing the 3rd batch
+    val mappedStream = fileStream.map(s => {
+      val i = s.toInt
+      if (i == 3) Thread.sleep(1000)
+      i
+    })
+    // Reducing over a large window to ensure that recovery from master failure
     // requires reprocessing of all the files seen before the failure
-    val reducedStream = fileStream.map(_.toInt)
-      .reduceByWindow(_ + _, batchDuration * 30, batchDuration)
+    val reducedStream = mappedStream.reduceByWindow(_ + _, batchDuration * 30, batchDuration)
+    val outputBuffer = new ArrayBuffer[Seq[Int]]
     var outputStream = new TestOutputStream(reducedStream, outputBuffer)
     ssc.registerOutputStream(outputStream)
     ssc.start()
@@ -275,6 +281,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     assert(outputStream.output.size > 0, "No files processed before restart")
     ssc.stop()
 
+    // Create files while the master is down
     for (i <- Seq(4, 5, 6)) {
       FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
       Thread.sleep(1000)
@@ -293,6 +300,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       Thread.sleep(500)
     }
     Thread.sleep(1000)
+    logInfo("Output = " + outputStream.output.mkString(","))
     assert(outputStream.output.size > 0, "No files processed after restart")
     ssc.stop()
 
@@ -316,6 +324,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       assert(outputBuffer(i).head === expectedOutput(i))
     }
   }
+
 }
 
 

From 9c8ff1e55fb97980e7f0bb7f305c1ed0e59b749e Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 23 Jan 2013 07:31:49 -0800
Subject: [PATCH 181/291] Fixed checkpoint testcases

---
 streaming/src/test/java/JavaAPISuite.java     |  23 +--
 .../spark/streaming/CheckpointSuite.scala     | 115 +++++++++++-
 .../spark/streaming/InputStreamsSuite.scala   | 163 +-----------------
 3 files changed, 129 insertions(+), 172 deletions(-)

diff --git a/streaming/src/test/java/JavaAPISuite.java b/streaming/src/test/java/JavaAPISuite.java
index c84e7331c7..7a189d85b4 100644
--- a/streaming/src/test/java/JavaAPISuite.java
+++ b/streaming/src/test/java/JavaAPISuite.java
@@ -45,7 +45,7 @@ public class JavaAPISuite implements Serializable {
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.master.port");
   }
-
+  /*
   @Test
   public void testCount() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -434,7 +434,7 @@ public class JavaAPISuite implements Serializable {
 
     assertOrderInvariantEquals(expected, result);
   }
-
+  */
   /*
    * Performs an order-invariant comparison of lists representing two RDD streams. This allows
    * us to account for ordering variation within individual RDD's which occurs during windowing.
@@ -450,7 +450,7 @@ public class JavaAPISuite implements Serializable {
     Assert.assertEquals(expected, actual);
   }
 
-
+  /*
   // PairDStream Functions
   @Test
   public void testPairFilter() {
@@ -897,7 +897,7 @@ public class JavaAPISuite implements Serializable {
 
     Assert.assertEquals(expected, result);
   }
-
+  */
   @Test
   public void testCheckpointMasterRecovery() throws InterruptedException {
     List<List<String>> inputData = Arrays.asList(
@@ -911,7 +911,6 @@ public class JavaAPISuite implements Serializable {
         Arrays.asList(1,4),
         Arrays.asList(8,7));
 
-
     File tempDir = Files.createTempDir();
     ssc.checkpoint(tempDir.getAbsolutePath(), new Duration(1000));
 
@@ -927,14 +926,16 @@ public class JavaAPISuite implements Serializable {
 
     assertOrderInvariantEquals(expectedInitial, initialResult);
     Thread.sleep(1000);
-
     ssc.stop();
+
     ssc = new JavaStreamingContext(tempDir.getAbsolutePath());
-    ssc.start();
-    List<List<Integer>> finalResult = JavaCheckpointTestUtils.runStreams(ssc, 2, 2);
-    assertOrderInvariantEquals(expectedFinal, finalResult);
+    // Tweak to take into consideration that the last batch before failure
+    // will be re-processed after recovery
+    List<List<Integer>> finalResult = JavaCheckpointTestUtils.runStreams(ssc, 2, 3);
+    assertOrderInvariantEquals(expectedFinal, finalResult.subList(1, 3));
   }
 
+
   /** TEST DISABLED: Pending a discussion about checkpoint() semantics with TD
   @Test
   public void testCheckpointofIndividualStream() throws InterruptedException {
@@ -963,7 +964,7 @@ public class JavaAPISuite implements Serializable {
     assertOrderInvariantEquals(expected, result1);
   }
   */
-
+  /*
   // Input stream tests. These mostly just test that we can instantiate a given InputStream with
   // Java arguments and assign it to a JavaDStream without producing type errors. Testing of the
   // InputStream functionality is deferred to the existing Scala tests.
@@ -1025,5 +1026,5 @@ public class JavaAPISuite implements Serializable {
   public void testFileStream() {
     JavaPairDStream<String, String> foo =
       ssc.<String, String, SequenceFileInputFormat>fileStream("/tmp/foo");
-  }
+  }*/
 }
diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
index 58da4ee539..04ccca4c01 100644
--- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
@@ -7,6 +7,8 @@ import org.scalatest.BeforeAndAfter
 import org.apache.commons.io.FileUtils
 import collection.mutable.{SynchronizedBuffer, ArrayBuffer}
 import util.{Clock, ManualClock}
+import scala.util.Random
+import com.google.common.io.Files
 
 class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
 
@@ -32,7 +34,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
 
   override def actuallyWait = true
 
-  test("basic stream+rdd recovery") {
+  test("basic rdd checkpoints + dstream graph checkpoint recovery") {
 
     assert(batchDuration === Milliseconds(500), "batchDuration for this test must be 1 second")
     assert(checkpointInterval === batchDuration, "checkpointInterval for this test much be same as batchDuration")
@@ -117,7 +119,10 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     ssc = null
   }
 
-  test("map and reduceByKey") {
+  // This tests whether the systm can recover from a master failure with simple
+  // non-stateful operations. This assumes as reliable, replayable input
+  // source - TestInputDStream.
+  test("recovery with map and reduceByKey operations") {
     testCheckpointedOperation(
       Seq( Seq("a", "a", "b"), Seq("", ""), Seq(), Seq("a", "a", "b"), Seq("", ""), Seq() ),
       (s: DStream[String]) => s.map(x => (x, 1)).reduceByKey(_ + _),
@@ -126,7 +131,11 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     )
   }
 
-  test("reduceByKeyAndWindowInv") {
+
+  // This tests whether the ReduceWindowedDStream's RDD checkpoints works correctly such
+  // that the system can recover from a master failure. This assumes as reliable,
+  // replayable input source - TestInputDStream.
+  test("recovery with invertible reduceByKeyAndWindow operation") {
     val n = 10
     val w = 4
     val input = (1 to n).map(_ => Seq("a")).toSeq
@@ -139,7 +148,11 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     testCheckpointedOperation(input, operation, output, 7)
   }
 
-  test("updateStateByKey") {
+
+  // This tests whether the StateDStream's RDD checkpoints works correctly such
+  // that the system can recover from a master failure. This assumes as reliable,
+  // replayable input source - TestInputDStream.
+  test("recovery with updateStateByKey operation") {
     val input = (1 to 10).map(_ => Seq("a")).toSeq
     val output = (1 to 10).map(x => Seq(("a", x))).toSeq
     val operation = (st: DStream[String]) => {
@@ -154,11 +167,99 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     testCheckpointedOperation(input, operation, output, 7)
   }
 
+  // This tests whether file input stream remembers what files were seen before
+  // the master failure and uses them again to process a large window operatoin.
+  // It also tests whether batches, whose processing was incomplete due to the
+  // failure, are re-processed or not.
+  test("recovery with file input stream") {
+    // Set up the streaming context and input streams
+    val testDir = Files.createTempDir()
+    var ssc = new StreamingContext(master, framework, batchDuration)
+    ssc.checkpoint(checkpointDir, checkpointInterval)
+    val fileStream = ssc.textFileStream(testDir.toString)
+    // Making value 3 take large time to process, to ensure that the master
+    // shuts down in the middle of processing the 3rd batch
+    val mappedStream = fileStream.map(s => {
+      val i = s.toInt
+      if (i == 3) Thread.sleep(1000)
+      i
+    })
+    // Reducing over a large window to ensure that recovery from master failure
+    // requires reprocessing of all the files seen before the failure
+    val reducedStream = mappedStream.reduceByWindow(_ + _, batchDuration * 30, batchDuration)
+    val outputBuffer = new ArrayBuffer[Seq[Int]]
+    var outputStream = new TestOutputStream(reducedStream, outputBuffer)
+    ssc.registerOutputStream(outputStream)
+    ssc.start()
+
+    // Create files and advance manual clock to process them
+    var clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+    Thread.sleep(1000)
+    for (i <- Seq(1, 2, 3)) {
+      FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
+      // wait to make sure that the file is written such that it gets shown in the file listings
+      Thread.sleep(500)
+      clock.addToTime(batchDuration.milliseconds)
+      // wait to make sure that FileInputDStream picks up this file only and not any other file
+      Thread.sleep(500)
+    }
+    logInfo("Output = " + outputStream.output.mkString(","))
+    assert(outputStream.output.size > 0, "No files processed before restart")
+    ssc.stop()
+
+    // Create files while the master is down
+    for (i <- Seq(4, 5, 6)) {
+      FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
+      Thread.sleep(1000)
+    }
+
+    // Restart stream computation from checkpoint and create more files to see whether
+    // they are being processed
+    logInfo("*********** RESTARTING ************")
+    ssc = new StreamingContext(checkpointDir)
+    ssc.start()
+    clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+    for (i <- Seq(7, 8, 9)) {
+      FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
+      Thread.sleep(500)
+      clock.addToTime(batchDuration.milliseconds)
+      Thread.sleep(500)
+    }
+    Thread.sleep(1000)
+    logInfo("Output = " + outputStream.output.mkString(","))
+    assert(outputStream.output.size > 0, "No files processed after restart")
+    ssc.stop()
+
+    // Append the new output to the old buffer
+    outputStream = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[Int]]
+    outputBuffer ++= outputStream.output
+
+    // Verify whether data received by Spark Streaming was as expected
+    val expectedOutput = Seq(1, 3, 6, 28, 36, 45)
+    logInfo("--------------------------------")
+    logInfo("output, size = " + outputBuffer.size)
+    outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("expected output, size = " + expectedOutput.size)
+    expectedOutput.foreach(x => logInfo("[" + x + "]"))
+    logInfo("--------------------------------")
+
+    // Verify whether all the elements received are as expected
+    assert(outputBuffer.size === expectedOutput.size)
+    for (i <- 0 until outputBuffer.size) {
+      assert(outputBuffer(i).size === 1)
+      assert(outputBuffer(i).head === expectedOutput(i))
+    }
+  }
+
+
   /**
-   * Tests a streaming operation under checkpointing, by restart the operation
+   * Tests a streaming operation under checkpointing, by restarting the operation
    * from checkpoint file and verifying whether the final output is correct.
    * The output is assumed to have come from a reliable queue which an replay
    * data as required.
+   *
+   * NOTE: This takes into consideration that the last batch processed before
+   * master failure will be re-processed after restart/recovery.
    */
   def testCheckpointedOperation[U: ClassManifest, V: ClassManifest](
     input: Seq[Seq[U]],
@@ -172,7 +273,8 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     val totalNumBatches = input.size
     val nextNumBatches = totalNumBatches - initialNumBatches
     val initialNumExpectedOutputs = initialNumBatches
-    val nextNumExpectedOutputs = expectedOutput.size - initialNumExpectedOutputs
+    val nextNumExpectedOutputs = expectedOutput.size - initialNumExpectedOutputs + 1
+    // because the last batch will be processed again
 
     // Do the computation for initial number of batches, create checkpoint file and quit
     ssc = setupStreams[U, V](input, operation)
@@ -188,6 +290,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     )
     ssc = new StreamingContext(checkpointDir)
     val outputNew = runStreams[V](ssc, nextNumBatches, nextNumExpectedOutputs)
+    // the first element will be re-processed data of the last batch before restart
     verifyOutput[V](outputNew, expectedOutput.takeRight(nextNumExpectedOutputs), true)
     ssc = null
   }
diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
index 34e51e9562..aa08ea1141 100644
--- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
@@ -19,35 +19,24 @@ import org.apache.avro.ipc.specific.SpecificRequestor
 import java.nio.ByteBuffer
 import collection.JavaConversions._
 import java.nio.charset.Charset
+import com.google.common.io.Files
 
 class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     
   System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
 
-  val testPort = 9999
-  var testServer: TestServer = null
-  var testDir: File = null
-
   override def checkpointDir = "checkpoint"
 
   after {
-    FileUtils.deleteDirectory(new File(checkpointDir))
-    if (testServer != null) {
-      testServer.stop()
-      testServer = null
-    }
-    if (testDir != null && testDir.exists()) {
-      FileUtils.deleteDirectory(testDir)
-      testDir = null
-    }
-
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.master.port")
   }
-  /*
+
+
   test("network input stream") {
     // Start the server
-    testServer = new TestServer(testPort)
+    val testPort = 9999
+    val testServer = new TestServer(testPort)
     testServer.start()
 
     // Set up the streaming context and input streams
@@ -93,46 +82,6 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     }
   }
 
-  test("network input stream with checkpoint") {
-    // Start the server
-    testServer = new TestServer(testPort)
-    testServer.start()
-
-    // Set up the streaming context and input streams
-    var ssc = new StreamingContext(master, framework, batchDuration)
-    ssc.checkpoint(checkpointDir, checkpointInterval)
-    val networkStream = ssc.networkTextStream("localhost", testPort, StorageLevel.MEMORY_AND_DISK)
-    var outputStream = new TestOutputStream(networkStream, new ArrayBuffer[Seq[String]])
-    ssc.registerOutputStream(outputStream)
-    ssc.start()
-
-    // Feed data to the server to send to the network receiver
-    var clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    for (i <- Seq(1, 2, 3)) {
-      testServer.send(i.toString + "\n")
-      Thread.sleep(100)
-      clock.addToTime(batchDuration.milliseconds)
-    }
-    Thread.sleep(500)
-    assert(outputStream.output.size > 0)
-    ssc.stop()
-
-    // Restart stream computation from checkpoint and feed more data to see whether
-    // they are being received and processed
-    logInfo("*********** RESTARTING ************")
-    ssc = new StreamingContext(checkpointDir)
-    ssc.start()
-    clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    for (i <- Seq(4, 5, 6)) {
-      testServer.send(i.toString + "\n")
-      Thread.sleep(100)
-      clock.addToTime(batchDuration.milliseconds)
-    }
-    Thread.sleep(500)
-    outputStream = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[String]]
-    assert(outputStream.output.size > 0)
-    ssc.stop()
-  }
 
   test("flume input stream") {
     // Set up the streaming context and input streams
@@ -182,18 +131,10 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     }
   }
 
+
   test("file input stream") {
-
-    // Create a temporary directory
-    testDir = {
-      var temp = File.createTempFile(".temp.", Random.nextInt().toString)
-      temp.delete()
-      temp.mkdirs()
-      logInfo("Created temp dir " + temp)
-      temp
-    }
-
     // Set up the streaming context and input streams
+    val testDir = Files.createTempDir()
     val ssc = new StreamingContext(master, framework, batchDuration)
     val filestream = ssc.textFileStream(testDir.toString)
     val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
@@ -235,96 +176,8 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       assert(output(i).size === 1)
       assert(output(i).head.toString === expectedOutput(i))
     }
+    FileUtils.deleteDirectory(testDir)
   }
-  */
-  test("file input stream with master failure") {
-    // Create a temporary directory
-    testDir = {
-      var temp = File.createTempFile(".temp.", Random.nextInt().toString)
-      temp.delete()
-      temp.mkdirs()
-      logInfo("Created temp dir " + temp)
-      temp
-    }
-
-    // Set up the streaming context and input streams
-    var ssc = new StreamingContext(master, framework, batchDuration)
-    ssc.checkpoint(checkpointDir, checkpointInterval)
-    val fileStream = ssc.textFileStream(testDir.toString)
-    // Making value 3 take large time to process, to ensure that the master
-    // shuts down in the middle of processing the 3rd batch
-    val mappedStream = fileStream.map(s => {
-      val i = s.toInt
-      if (i == 3) Thread.sleep(1000)
-      i
-    })
-    // Reducing over a large window to ensure that recovery from master failure
-    // requires reprocessing of all the files seen before the failure
-    val reducedStream = mappedStream.reduceByWindow(_ + _, batchDuration * 30, batchDuration)
-    val outputBuffer = new ArrayBuffer[Seq[Int]]
-    var outputStream = new TestOutputStream(reducedStream, outputBuffer)
-    ssc.registerOutputStream(outputStream)
-    ssc.start()
-
-    // Create files and advance manual clock to process them
-    var clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    Thread.sleep(1000)
-    for (i <- Seq(1, 2, 3)) {
-      FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
-      // wait to make sure that the file is written such that it gets shown in the file listings
-      Thread.sleep(500)
-      clock.addToTime(batchDuration.milliseconds)
-      // wait to make sure that FileInputDStream picks up this file only and not any other file
-      Thread.sleep(500)
-    }
-    logInfo("Output = " + outputStream.output.mkString(","))
-    assert(outputStream.output.size > 0, "No files processed before restart")
-    ssc.stop()
-
-    // Create files while the master is down
-    for (i <- Seq(4, 5, 6)) {
-      FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
-      Thread.sleep(1000)
-    }
-
-    // Restart stream computation from checkpoint and create more files to see whether
-    // they are being processed
-    logInfo("*********** RESTARTING ************")
-    ssc = new StreamingContext(checkpointDir)
-    ssc.start()
-    clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    for (i <- Seq(7, 8, 9)) {
-      FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
-      Thread.sleep(500)
-      clock.addToTime(batchDuration.milliseconds)
-      Thread.sleep(500)
-    }
-    Thread.sleep(1000)
-    logInfo("Output = " + outputStream.output.mkString(","))
-    assert(outputStream.output.size > 0, "No files processed after restart")
-    ssc.stop()
-
-    // Append the new output to the old buffer
-    outputStream = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[Int]]
-    outputBuffer ++= outputStream.output
-
-    // Verify whether data received by Spark Streaming was as expected
-    val expectedOutput = Seq(1, 3, 6, 28, 36, 45)
-    logInfo("--------------------------------")
-    logInfo("output, size = " + outputBuffer.size)
-    outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output, size = " + expectedOutput.size)
-    expectedOutput.foreach(x => logInfo("[" + x + "]"))
-    logInfo("--------------------------------")
-
-    // Verify whether all the elements received are as expected
-    assert(outputBuffer.size === expectedOutput.size)
-    for (i <- 0 until outputBuffer.size) {
-      assert(outputBuffer(i).size === 1)
-      assert(outputBuffer(i).head === expectedOutput(i))
-    }
-  }
-
 }
 
 

From ae2ed2947d43860c74a8d40767e289ca78073977 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 10:36:18 -0800
Subject: [PATCH 182/291] Allow PySpark's SparkFiles to be used from driver

Fix minor documentation formatting issues.
---
 core/src/main/scala/spark/SparkFiles.java |  8 +++----
 python/pyspark/context.py                 | 27 ++++++++++++++++++-----
 python/pyspark/files.py                   | 20 ++++++++++++++---
 python/pyspark/tests.py                   | 23 +++++++++++++++++++
 python/pyspark/worker.py                  |  1 +
 python/test_support/hello.txt             |  1 +
 6 files changed, 67 insertions(+), 13 deletions(-)
 create mode 100755 python/test_support/hello.txt

diff --git a/core/src/main/scala/spark/SparkFiles.java b/core/src/main/scala/spark/SparkFiles.java
index b59d8ce93f..566aec622c 100644
--- a/core/src/main/scala/spark/SparkFiles.java
+++ b/core/src/main/scala/spark/SparkFiles.java
@@ -3,23 +3,23 @@ package spark;
 import java.io.File;
 
 /**
- * Resolves paths to files added through `addFile().
+ * Resolves paths to files added through `SparkContext.addFile()`.
  */
 public class SparkFiles {
 
   private SparkFiles() {}
 
   /**
-   * Get the absolute path of a file added through `addFile()`.
+   * Get the absolute path of a file added through `SparkContext.addFile()`.
    */
   public static String get(String filename) {
     return new File(getRootDirectory(), filename).getAbsolutePath();
   }
 
   /**
-   * Get the root directory that contains files added through `addFile()`.
+   * Get the root directory that contains files added through `SparkContext.addFile()`.
    */
   public static String getRootDirectory() {
     return SparkEnv.get().sparkFilesDir();
   }
-}
\ No newline at end of file
+}
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index b8d7dc05af..3e33776af0 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -1,12 +1,15 @@
 import os
 import atexit
 import shutil
+import sys
 import tempfile
+from threading import Lock
 from tempfile import NamedTemporaryFile
 
 from pyspark import accumulators
 from pyspark.accumulators import Accumulator
 from pyspark.broadcast import Broadcast
+from pyspark.files import SparkFiles
 from pyspark.java_gateway import launch_gateway
 from pyspark.serializers import dump_pickle, write_with_length, batched
 from pyspark.rdd import RDD
@@ -27,6 +30,8 @@ class SparkContext(object):
     _writeIteratorToPickleFile = jvm.PythonRDD.writeIteratorToPickleFile
     _takePartition = jvm.PythonRDD.takePartition
     _next_accum_id = 0
+    _active_spark_context = None
+    _lock = Lock()
 
     def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
         environment=None, batchSize=1024):
@@ -46,6 +51,11 @@ class SparkContext(object):
                Java object.  Set 1 to disable batching or -1 to use an
                unlimited batch size.
         """
+        with SparkContext._lock:
+            if SparkContext._active_spark_context:
+                raise ValueError("Cannot run multiple SparkContexts at once")
+            else:
+                SparkContext._active_spark_context = self
         self.master = master
         self.jobName = jobName
         self.sparkHome = sparkHome or None # None becomes null in Py4J
@@ -75,6 +85,8 @@ class SparkContext(object):
         # Deploy any code dependencies specified in the constructor
         for path in (pyFiles or []):
             self.addPyFile(path)
+        SparkFiles._sc = self
+        sys.path.append(SparkFiles.getRootDirectory())
 
     @property
     def defaultParallelism(self):
@@ -85,17 +97,20 @@ class SparkContext(object):
         return self._jsc.sc().defaultParallelism()
 
     def __del__(self):
-        if self._jsc:
-            self._jsc.stop()
-        if self._accumulatorServer:
-            self._accumulatorServer.shutdown()
+        self.stop()
 
     def stop(self):
         """
         Shut down the SparkContext.
         """
-        self._jsc.stop()
-        self._jsc = None
+        if self._jsc:
+            self._jsc.stop()
+            self._jsc = None
+        if self._accumulatorServer:
+            self._accumulatorServer.shutdown()
+            self._accumulatorServer = None
+        with SparkContext._lock:
+            SparkContext._active_spark_context = None
 
     def parallelize(self, c, numSlices=None):
         """
diff --git a/python/pyspark/files.py b/python/pyspark/files.py
index de1334f046..98f6a399cc 100644
--- a/python/pyspark/files.py
+++ b/python/pyspark/files.py
@@ -4,13 +4,15 @@ import os
 class SparkFiles(object):
     """
     Resolves paths to files added through
-    L{addFile()<pyspark.context.SparkContext.addFile>}.
+    L{SparkContext.addFile()<pyspark.context.SparkContext.addFile>}.
 
     SparkFiles contains only classmethods; users should not create SparkFiles
     instances.
     """
 
     _root_directory = None
+    _is_running_on_worker = False
+    _sc = None
 
     def __init__(self):
         raise NotImplementedError("Do not construct SparkFiles objects")
@@ -18,7 +20,19 @@ class SparkFiles(object):
     @classmethod
     def get(cls, filename):
         """
-        Get the absolute path of a file added through C{addFile()}.
+        Get the absolute path of a file added through C{SparkContext.addFile()}.
         """
-        path = os.path.join(SparkFiles._root_directory, filename)
+        path = os.path.join(SparkFiles.getRootDirectory(), filename)
         return os.path.abspath(path)
+
+    @classmethod
+    def getRootDirectory(cls):
+        """
+        Get the root directory that contains files added through
+        C{SparkContext.addFile()}.
+        """
+        if cls._is_running_on_worker:
+            return cls._root_directory
+        else:
+            # This will have to change if we support multiple SparkContexts:
+            return cls._sc.jvm.spark.SparkFiles.getRootDirectory()
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 4d70ee4f12..46ab34f063 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -4,22 +4,26 @@ individual modules.
 """
 import os
 import shutil
+import sys
 from tempfile import NamedTemporaryFile
 import time
 import unittest
 
 from pyspark.context import SparkContext
+from pyspark.files import SparkFiles
 from pyspark.java_gateway import SPARK_HOME
 
 
 class PySparkTestCase(unittest.TestCase):
 
     def setUp(self):
+        self._old_sys_path = list(sys.path)
         class_name = self.__class__.__name__
         self.sc = SparkContext('local[4]', class_name , batchSize=2)
 
     def tearDown(self):
         self.sc.stop()
+        sys.path = self._old_sys_path
         # To avoid Akka rebinding to the same port, since it doesn't unbind
         # immediately on shutdown
         self.sc.jvm.System.clearProperty("spark.master.port")
@@ -84,6 +88,25 @@ class TestAddFile(PySparkTestCase):
         res = self.sc.parallelize(range(2)).map(func).first()
         self.assertEqual("Hello World!", res)
 
+    def test_add_file_locally(self):
+        path = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
+        self.sc.addFile(path)
+        download_path = SparkFiles.get("hello.txt")
+        self.assertNotEqual(path, download_path)
+        with open(download_path) as test_file:
+            self.assertEquals("Hello World!\n", test_file.readline())
+
+    def test_add_py_file_locally(self):
+        # To ensure that we're actually testing addPyFile's effects, check that
+        # this fails due to `userlibrary` not being on the Python path:
+        def func():
+            from userlibrary import UserClass
+        self.assertRaises(ImportError, func)
+        path = os.path.join(SPARK_HOME, "python/test_support/userlibrary.py")
+        self.sc.addFile(path)
+        from userlibrary import UserClass
+        self.assertEqual("Hello World!", UserClass().hello())
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 4bf643da66..d33d6dd15f 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -26,6 +26,7 @@ def main():
     split_index = read_int(sys.stdin)
     spark_files_dir = load_pickle(read_with_length(sys.stdin))
     SparkFiles._root_directory = spark_files_dir
+    SparkFiles._is_running_on_worker = True
     sys.path.append(spark_files_dir)
     num_broadcast_variables = read_int(sys.stdin)
     for _ in range(num_broadcast_variables):
diff --git a/python/test_support/hello.txt b/python/test_support/hello.txt
new file mode 100755
index 0000000000..980a0d5f19
--- /dev/null
+++ b/python/test_support/hello.txt
@@ -0,0 +1 @@
+Hello World!

From b47d054cfc5ef45b92a1c970388722ffa0283e66 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 11:18:25 -0800
Subject: [PATCH 183/291] Remove use of abc.ABCMeta due to cloudpickle issue.

cloudpickle runs into issues while pickling subclasses of AccumulatorParam,
which may be related to this Python issue:

    http://bugs.python.org/issue7689

This seems hard to fix and the ABCMeta wasn't necessary, so I removed it.
---
 python/pyspark/accumulators.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 5a9269f9bb..61fcbbd376 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -25,7 +25,8 @@
 >>> a.value
 13
 
->>> class VectorAccumulatorParam(object):
+>>> from pyspark.accumulators import AccumulatorParam
+>>> class VectorAccumulatorParam(AccumulatorParam):
 ...     def zero(self, value):
 ...         return [0.0] * len(value)
 ...     def addInPlace(self, val1, val2):
@@ -61,7 +62,6 @@ Traceback (most recent call last):
 Exception:...
 """
 
-from abc import ABCMeta, abstractmethod
 import struct
 import SocketServer
 import threading
@@ -138,23 +138,20 @@ class AccumulatorParam(object):
     """
     Helper object that defines how to accumulate values of a given type.
     """
-    __metaclass__ = ABCMeta
 
-    @abstractmethod
     def zero(self, value):
         """
         Provide a "zero value" for the type, compatible in dimensions with the
         provided C{value} (e.g., a zero vector)
         """
-        return
+        raise NotImplementedError
 
-    @abstractmethod
     def addInPlace(self, value1, value2):
         """
         Add two values of the accumulator's data type, returning a new value;
         for efficiency, can also update C{value1} in place and return it.
         """
-        return
+        raise NotImplementedError
 
 
 class AddingAccumulatorParam(AccumulatorParam):

From e1027ca6398fd5b1a99a2203df840911c4dccb27 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 12:22:11 -0800
Subject: [PATCH 184/291] Actually add CacheManager.

---
 core/src/main/scala/spark/CacheManager.scala | 65 ++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 core/src/main/scala/spark/CacheManager.scala

diff --git a/core/src/main/scala/spark/CacheManager.scala b/core/src/main/scala/spark/CacheManager.scala
new file mode 100644
index 0000000000..a0b53fd9d6
--- /dev/null
+++ b/core/src/main/scala/spark/CacheManager.scala
@@ -0,0 +1,65 @@
+package spark
+
+import scala.collection.mutable.{ArrayBuffer, HashSet}
+import spark.storage.{BlockManager, StorageLevel}
+
+
+/** Spark class responsible for passing RDDs split contents to the BlockManager and making
+    sure a node doesn't load two copies of an RDD at once.
+  */
+private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
+  private val loading = new HashSet[String]
+
+  /** Gets or computes an RDD split. Used by RDD.iterator() when a RDD is cached. */
+  def getOrCompute[T](rdd: RDD[T], split: Split, context: TaskContext, storageLevel: StorageLevel)
+  : Iterator[T] = {
+    val key = "rdd_%d_%d".format(rdd.id, split.index)
+    logInfo("Cache key is " + key)
+    blockManager.get(key) match {
+      case Some(cachedValues) =>
+        // Split is in cache, so just return its values
+        logInfo("Found partition in cache!")
+        return cachedValues.asInstanceOf[Iterator[T]]
+
+      case None =>
+        // Mark the split as loading (unless someone else marks it first)
+        loading.synchronized {
+          if (loading.contains(key)) {
+            logInfo("Loading contains " + key + ", waiting...")
+            while (loading.contains(key)) {
+              try {loading.wait()} catch {case _ =>}
+            }
+            logInfo("Loading no longer contains " + key + ", so returning cached result")
+            // See whether someone else has successfully loaded it. The main way this would fail
+            // is for the RDD-level cache eviction policy if someone else has loaded the same RDD
+            // partition but we didn't want to make space for it. However, that case is unlikely
+            // because it's unlikely that two threads would work on the same RDD partition. One
+            // downside of the current code is that threads wait serially if this does happen.
+            blockManager.get(key) match {
+              case Some(values) =>
+                return values.asInstanceOf[Iterator[T]]
+              case None =>
+                logInfo("Whoever was loading " + key + " failed; we'll try it ourselves")
+                loading.add(key)
+            }
+          } else {
+            loading.add(key)
+          }
+        }
+        try {
+          // If we got here, we have to load the split
+          val elements = new ArrayBuffer[Any]
+          logInfo("Computing partition " + split)
+          elements ++= rdd.compute(split, context)
+          // Try to put this block in the blockManager
+          blockManager.put(key, elements, storageLevel, true)
+          return elements.iterator.asInstanceOf[Iterator[T]]
+        } finally {
+          loading.synchronized {
+            loading.remove(key)
+            loading.notifyAll()
+          }
+        }
+    }
+  }
+}

From 88b9d240fda7ca34c08752dfa66797eecb6db872 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 12:40:38 -0800
Subject: [PATCH 185/291] Remove dead code in test.

---
 core/src/test/scala/spark/DistributedSuite.scala | 2 --
 1 file changed, 2 deletions(-)

diff --git a/core/src/test/scala/spark/DistributedSuite.scala b/core/src/test/scala/spark/DistributedSuite.scala
index 0d6b265e54..af66d33aa3 100644
--- a/core/src/test/scala/spark/DistributedSuite.scala
+++ b/core/src/test/scala/spark/DistributedSuite.scala
@@ -194,7 +194,6 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
     DistributedSuite.amMaster = true
     sc = new SparkContext(clusterUrl, "test")
     val data = sc.parallelize(Seq(true, true), 2)
-    val singleton = sc.parallelize(Seq(true), 1)
     assert(data.count === 2) // force executors to start
     val masterId = SparkEnv.get.blockManager.blockManagerId
     assert(data.map(markNodeIfIdentity).collect.size === 2)
@@ -207,7 +206,6 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
     sc = new SparkContext(clusterUrl, "test")
     for (i <- 1 to 3) {
       val data = sc.parallelize(Seq(true, false), 2)
-      val singleton = sc.parallelize(Seq(false), 1)
       assert(data.count === 2)
       assert(data.map(markNodeIfIdentity).collect.size === 2)
       assert(data.map(failOnMarkedIdentity).map(x => x -> x).groupByKey.count === 2)

From be4a115a7ec7fb6ec0d34f1a1a1bb2c9bbe7600e Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 12:48:45 -0800
Subject: [PATCH 186/291] Clarify TODO.

---
 core/src/main/scala/spark/scheduler/DAGScheduler.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 740aec2e61..14a3ef8ad7 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -76,7 +76,8 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   // sent with every task. When we detect a node failing, we note the current generation number
   // and failed host, increment it for new tasks, and use this to ignore stray ShuffleMapTask
   // results.
-  // TODO: Garbage collect information about failure generations when new stages start.
+  // TODO: Garbage collect information about failure generations when we know there are no more
+  //       stray messages to detect.
   val failedGeneration = new HashMap[String, Long]
 
   val waiting = new HashSet[Stage] // Stages we need to run whose parents aren't done

From e1985bfa04ad4583ac1f0f421cbe0182ce7c53df Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Mon, 21 Jan 2013 16:21:14 -0800
Subject: [PATCH 187/291] be sure to set class loader of kryo instances

---
 core/src/main/scala/spark/KryoSerializer.scala | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/KryoSerializer.scala b/core/src/main/scala/spark/KryoSerializer.scala
index 93d7327324..56919544e8 100644
--- a/core/src/main/scala/spark/KryoSerializer.scala
+++ b/core/src/main/scala/spark/KryoSerializer.scala
@@ -206,5 +206,8 @@ class KryoSerializer extends spark.serializer.Serializer with Logging {
     kryo
   }
 
-  def newInstance(): SerializerInstance = new KryoSerializerInstance(this)
+  def newInstance(): SerializerInstance = {
+    this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader)
+    new KryoSerializerInstance(this)
+  }
 }

From 5c7422292ecace947f78e5ebe97e83a355531af7 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 12:59:51 -0800
Subject: [PATCH 188/291] Remove more dead code from test.

---
 core/src/test/scala/spark/DistributedSuite.scala | 1 -
 1 file changed, 1 deletion(-)

diff --git a/core/src/test/scala/spark/DistributedSuite.scala b/core/src/test/scala/spark/DistributedSuite.scala
index af66d33aa3..0487e06d12 100644
--- a/core/src/test/scala/spark/DistributedSuite.scala
+++ b/core/src/test/scala/spark/DistributedSuite.scala
@@ -218,7 +218,6 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
     sc = new SparkContext(clusterUrl, "test")
     for (i <- 1 to 3) {
       val data = sc.parallelize(Seq(true, true), 2)
-      val singleton = sc.parallelize(Seq(false), 1)
       assert(data.count === 2)
       assert(data.map(markNodeIfIdentity).collect.size === 2)
       // This relies on mergeCombiners being used to perform the actual reduce for this

From 1dd82743e09789f8fdae2f5628545c0cb9f79245 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 23 Jan 2013 13:07:27 -0800
Subject: [PATCH 189/291] Fix compile error due to cherry-pick

---
 core/src/main/scala/spark/KryoSerializer.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/KryoSerializer.scala b/core/src/main/scala/spark/KryoSerializer.scala
index 56919544e8..0bd73e936b 100644
--- a/core/src/main/scala/spark/KryoSerializer.scala
+++ b/core/src/main/scala/spark/KryoSerializer.scala
@@ -207,7 +207,7 @@ class KryoSerializer extends spark.serializer.Serializer with Logging {
   }
 
   def newInstance(): SerializerInstance = {
-    this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader)
+    this.kryo.get().setClassLoader(Thread.currentThread().getContextClassLoader)
     new KryoSerializerInstance(this)
   }
 }

From eb222b720647c9e92a867c591cc4914b9a6cb5c1 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 23 Jan 2013 15:29:02 -0800
Subject: [PATCH 190/291] Added pruntSplits method to RDD.

---
 core/src/main/scala/spark/RDD.scala           | 10 ++++++++
 .../scala/spark/rdd/SplitsPruningRDD.scala    | 24 +++++++++++++++++++
 core/src/test/scala/spark/RDDSuite.scala      | 22 +++++++++++------
 3 files changed, 49 insertions(+), 7 deletions(-)
 create mode 100644 core/src/main/scala/spark/rdd/SplitsPruningRDD.scala

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index e0d2eabb1d..3d93ff33bb 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -40,6 +40,7 @@ import spark.rdd.MapPartitionsRDD
 import spark.rdd.MapPartitionsWithSplitRDD
 import spark.rdd.PipedRDD
 import spark.rdd.SampledRDD
+import spark.rdd.SplitsPruningRDD
 import spark.rdd.UnionRDD
 import spark.rdd.ZippedRDD
 import spark.storage.StorageLevel
@@ -543,6 +544,15 @@ abstract class RDD[T: ClassManifest](
     map(x => (f(x), x))
   }
 
+  /**
+   * Prune splits (partitions) so Spark can avoid launching tasks on
+   * all splits. An example use case: If we know the RDD is partitioned by range,
+   * and the execution DAG has a filter on the key, we can avoid launching tasks
+   * on splits that don't have the range covering the key.
+   */
+  def pruneSplits(splitsFilterFunc: Int => Boolean): RDD[T] =
+    new SplitsPruningRDD(this, splitsFilterFunc)
+
   /** A private method for tests, to look at the contents of each partition */
   private[spark] def collectPartitions(): Array[Array[T]] = {
     sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
diff --git a/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala b/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
new file mode 100644
index 0000000000..74e10265fc
--- /dev/null
+++ b/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
@@ -0,0 +1,24 @@
+package spark.rdd
+
+import spark.{OneToOneDependency, RDD, SparkEnv, Split, TaskContext}
+
+/**
+ * A RDD used to prune RDD splits so we can avoid launching tasks on
+ * all splits. An example use case: If we know the RDD is partitioned by range,
+ * and the execution DAG has a filter on the key, we can avoid launching tasks
+ * on splits that don't have the range covering the key.
+ */
+class SplitsPruningRDD[T: ClassManifest](
+    prev: RDD[T],
+    @transient splitsFilterFunc: Int => Boolean)
+  extends RDD[T](prev) {
+
+  @transient
+  val _splits: Array[Split] = prev.splits.filter(s => splitsFilterFunc(s.index))
+
+  override def compute(split: Split, context: TaskContext) = prev.iterator(split, context)
+
+  override protected def getSplits = _splits
+
+  override val partitioner = prev.partitioner
+}
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index db217f8482..03aa2845f4 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -1,11 +1,9 @@
 package spark
 
 import scala.collection.mutable.HashMap
-import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
-
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import spark.SparkContext._
 import spark.rdd.CoalescedRDD
-import SparkContext._
 
 class RDDSuite extends FunSuite with BeforeAndAfter {
 
@@ -104,7 +102,7 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("caching with failures") {
-    sc = new SparkContext("local", "test") 
+    sc = new SparkContext("local", "test")
     val onlySplit = new Split { override def index: Int = 0 }
     var shouldFail = true
     val rdd = new RDD[Int](sc, Nil) {
@@ -136,8 +134,10 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
       List(List(1, 2, 3, 4, 5), List(6, 7, 8, 9, 10)))
 
     // Check that the narrow dependency is also specified correctly
-    assert(coalesced1.dependencies.head.asInstanceOf[NarrowDependency[_]].getParents(0).toList === List(0, 1, 2, 3, 4))
-    assert(coalesced1.dependencies.head.asInstanceOf[NarrowDependency[_]].getParents(1).toList === List(5, 6, 7, 8, 9))
+    assert(coalesced1.dependencies.head.asInstanceOf[NarrowDependency[_]].getParents(0).toList ===
+      List(0, 1, 2, 3, 4))
+    assert(coalesced1.dependencies.head.asInstanceOf[NarrowDependency[_]].getParents(1).toList ===
+      List(5, 6, 7, 8, 9))
 
     val coalesced2 = new CoalescedRDD(data, 3)
     assert(coalesced2.collect().toList === (1 to 10).toList)
@@ -168,4 +168,12 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
       nums.zip(sc.parallelize(1 to 4, 1)).collect()
     }
   }
+
+  test("split pruning") {
+    sc = new SparkContext("local", "test")
+    val data = sc.parallelize(1 to 10, 10)
+    // Note that split number starts from 0, so > 8 means only 10th partition left.
+    val prunedData = data.pruneSplits(splitNum => splitNum > 8).collect
+    assert(prunedData.size == 1 && prunedData(0) == 10)
+  }
 }

From c24b3819dd474e13d6098150c174b2e7e4bc6498 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 23 Jan 2013 15:34:59 -0800
Subject: [PATCH 191/291] Added an extra assert for split size check.

---
 core/src/test/scala/spark/RDDSuite.scala | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index 03aa2845f4..ef74c99246 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -173,7 +173,10 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
     sc = new SparkContext("local", "test")
     val data = sc.parallelize(1 to 10, 10)
     // Note that split number starts from 0, so > 8 means only 10th partition left.
-    val prunedData = data.pruneSplits(splitNum => splitNum > 8).collect
-    assert(prunedData.size == 1 && prunedData(0) == 10)
+    val prunedRdd = data.pruneSplits(splitNum => splitNum > 8)
+    assert(prunedRdd.splits.size == 1)
+    val prunedData = prunedRdd.collect
+    assert(prunedData.size == 1)
+    assert(prunedData(0) == 10)
   }
 }

From 45cd50d5fe40869cdc237157e073cfb5ac47b27c Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 23 Jan 2013 16:06:58 -0800
Subject: [PATCH 192/291] Updated assert == to ===.

---
 core/src/test/scala/spark/RDDSuite.scala | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index ef74c99246..5a3a12dfff 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -174,9 +174,9 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
     val data = sc.parallelize(1 to 10, 10)
     // Note that split number starts from 0, so > 8 means only 10th partition left.
     val prunedRdd = data.pruneSplits(splitNum => splitNum > 8)
-    assert(prunedRdd.splits.size == 1)
+    assert(prunedRdd.splits.size === 1)
     val prunedData = prunedRdd.collect
-    assert(prunedData.size == 1)
-    assert(prunedData(0) == 10)
+    assert(prunedData.size === 1)
+    assert(prunedData(0) === 10)
   }
 }

From 636e912f3289e422be9550752f5279d519062b75 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 23 Jan 2013 21:21:55 -0800
Subject: [PATCH 193/291] Created a PruneDependency to properly assign
 dependency for SplitsPruningRDD.

---
 core/src/main/scala/spark/Dependency.scala    | 24 ++++++++++++++++---
 .../scala/spark/rdd/SplitsPruningRDD.scala    |  8 +++----
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/spark/Dependency.scala
index b85d2732db..7d5858e88e 100644
--- a/core/src/main/scala/spark/Dependency.scala
+++ b/core/src/main/scala/spark/Dependency.scala
@@ -5,6 +5,7 @@ package spark
  */
 abstract class Dependency[T](val rdd: RDD[T]) extends Serializable
 
+
 /**
  * Base class for dependencies where each partition of the parent RDD is used by at most one
  * partition of the child RDD.  Narrow dependencies allow for pipelined execution.
@@ -12,12 +13,13 @@ abstract class Dependency[T](val rdd: RDD[T]) extends Serializable
 abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
   /**
    * Get the parent partitions for a child partition.
-   * @param outputPartition a partition of the child RDD
+   * @param partitionId a partition of the child RDD
    * @return the partitions of the parent RDD that the child partition depends upon
    */
-  def getParents(outputPartition: Int): Seq[Int]
+  def getParents(partitionId: Int): Seq[Int]
 }
 
+
 /**
  * Represents a dependency on the output of a shuffle stage.
  * @param shuffleId the shuffle id
@@ -32,6 +34,7 @@ class ShuffleDependency[K, V](
   val shuffleId: Int = rdd.context.newShuffleId()
 }
 
+
 /**
  * Represents a one-to-one dependency between partitions of the parent and child RDDs.
  */
@@ -39,6 +42,7 @@ class OneToOneDependency[T](rdd: RDD[T]) extends NarrowDependency[T](rdd) {
   override def getParents(partitionId: Int) = List(partitionId)
 }
 
+
 /**
  * Represents a one-to-one dependency between ranges of partitions in the parent and child RDDs.
  * @param rdd the parent RDD
@@ -48,7 +52,7 @@ class OneToOneDependency[T](rdd: RDD[T]) extends NarrowDependency[T](rdd) {
  */
 class RangeDependency[T](rdd: RDD[T], inStart: Int, outStart: Int, length: Int)
   extends NarrowDependency[T](rdd) {
-  
+
   override def getParents(partitionId: Int) = {
     if (partitionId >= outStart && partitionId < outStart + length) {
       List(partitionId - outStart + inStart)
@@ -57,3 +61,17 @@ class RangeDependency[T](rdd: RDD[T], inStart: Int, outStart: Int, length: Int)
     }
   }
 }
+
+
+/**
+ * Represents a dependency between the SplitsPruningRDD and its parent. In this
+ * case, the child RDD contains a subset of splits of the parents'.
+ */
+class PruneDependency[T](rdd: RDD[T], @transient splitsFilterFunc: Int => Boolean)
+  extends NarrowDependency[T](rdd) {
+
+  @transient
+  val splits: Array[Split] = rdd.splits.filter(s => splitsFilterFunc(s.index))
+
+  override def getParents(partitionId: Int) = List(splits(partitionId).index)
+}
diff --git a/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala b/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
index 74e10265fc..7b44d85bb5 100644
--- a/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
+++ b/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
@@ -1,6 +1,6 @@
 package spark.rdd
 
-import spark.{OneToOneDependency, RDD, SparkEnv, Split, TaskContext}
+import spark.{PruneDependency, RDD, SparkEnv, Split, TaskContext}
 
 /**
  * A RDD used to prune RDD splits so we can avoid launching tasks on
@@ -11,12 +11,12 @@ import spark.{OneToOneDependency, RDD, SparkEnv, Split, TaskContext}
 class SplitsPruningRDD[T: ClassManifest](
     prev: RDD[T],
     @transient splitsFilterFunc: Int => Boolean)
-  extends RDD[T](prev) {
+  extends RDD[T](prev.context, List(new PruneDependency(prev, splitsFilterFunc))) {
 
   @transient
-  val _splits: Array[Split] = prev.splits.filter(s => splitsFilterFunc(s.index))
+  val _splits: Array[Split] = dependencies_.head.asInstanceOf[PruneDependency[T]].splits
 
-  override def compute(split: Split, context: TaskContext) = prev.iterator(split, context)
+  override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(split, context)
 
   override protected def getSplits = _splits
 

From 81004b967e838fca0790727a3fea5a265ddbc69a Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 23 Jan 2013 21:54:27 -0800
Subject: [PATCH 194/291] Marked prev RDD as transient in SplitsPruningRDD.

---
 core/src/main/scala/spark/rdd/SplitsPruningRDD.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala b/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
index 7b44d85bb5..9b1a210ba3 100644
--- a/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
+++ b/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
@@ -9,7 +9,7 @@ import spark.{PruneDependency, RDD, SparkEnv, Split, TaskContext}
  * on splits that don't have the range covering the key.
  */
 class SplitsPruningRDD[T: ClassManifest](
-    prev: RDD[T],
+    @transient prev: RDD[T],
     @transient splitsFilterFunc: Int => Boolean)
   extends RDD[T](prev.context, List(new PruneDependency(prev, splitsFilterFunc))) {
 
@@ -20,5 +20,5 @@ class SplitsPruningRDD[T: ClassManifest](
 
   override protected def getSplits = _splits
 
-  override val partitioner = prev.partitioner
+  override val partitioner = firstParent[T].partitioner
 }

From eedc542a0276a5248c81446ee84f56d691e5f488 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 23 Jan 2013 22:14:23 -0800
Subject: [PATCH 195/291] Removed pruneSplits method in RDD and renamed
 SplitsPruningRDD to PartitionPruningRDD.

---
 core/src/main/scala/spark/RDD.scala           | 10 --------
 .../scala/spark/rdd/PartitionPruningRDD.scala | 24 +++++++++++++++++++
 .../scala/spark/rdd/SplitsPruningRDD.scala    | 24 -------------------
 core/src/test/scala/spark/RDDSuite.scala      |  6 ++---
 4 files changed, 27 insertions(+), 37 deletions(-)
 create mode 100644 core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
 delete mode 100644 core/src/main/scala/spark/rdd/SplitsPruningRDD.scala

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 3d93ff33bb..e0d2eabb1d 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -40,7 +40,6 @@ import spark.rdd.MapPartitionsRDD
 import spark.rdd.MapPartitionsWithSplitRDD
 import spark.rdd.PipedRDD
 import spark.rdd.SampledRDD
-import spark.rdd.SplitsPruningRDD
 import spark.rdd.UnionRDD
 import spark.rdd.ZippedRDD
 import spark.storage.StorageLevel
@@ -544,15 +543,6 @@ abstract class RDD[T: ClassManifest](
     map(x => (f(x), x))
   }
 
-  /**
-   * Prune splits (partitions) so Spark can avoid launching tasks on
-   * all splits. An example use case: If we know the RDD is partitioned by range,
-   * and the execution DAG has a filter on the key, we can avoid launching tasks
-   * on splits that don't have the range covering the key.
-   */
-  def pruneSplits(splitsFilterFunc: Int => Boolean): RDD[T] =
-    new SplitsPruningRDD(this, splitsFilterFunc)
-
   /** A private method for tests, to look at the contents of each partition */
   private[spark] def collectPartitions(): Array[Array[T]] = {
     sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
new file mode 100644
index 0000000000..3048949ef2
--- /dev/null
+++ b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
@@ -0,0 +1,24 @@
+package spark.rdd
+
+import spark.{PruneDependency, RDD, SparkEnv, Split, TaskContext}
+
+/**
+ * A RDD used to prune RDD partitions/splits so we can avoid launching tasks on
+ * all partitions. An example use case: If we know the RDD is partitioned by range,
+ * and the execution DAG has a filter on the key, we can avoid launching tasks
+ * on partitions that don't have the range covering the key.
+ */
+class PartitionPruningRDD[T: ClassManifest](
+    @transient prev: RDD[T],
+    @transient partitionFilterFunc: Int => Boolean)
+  extends RDD[T](prev.context, List(new PruneDependency(prev, partitionFilterFunc))) {
+
+  @transient
+  val partitions_ : Array[Split] = dependencies_.head.asInstanceOf[PruneDependency[T]].splits
+
+  override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(split, context)
+
+  override protected def getSplits = partitions_
+
+  override val partitioner = firstParent[T].partitioner
+}
diff --git a/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala b/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
deleted file mode 100644
index 9b1a210ba3..0000000000
--- a/core/src/main/scala/spark/rdd/SplitsPruningRDD.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-package spark.rdd
-
-import spark.{PruneDependency, RDD, SparkEnv, Split, TaskContext}
-
-/**
- * A RDD used to prune RDD splits so we can avoid launching tasks on
- * all splits. An example use case: If we know the RDD is partitioned by range,
- * and the execution DAG has a filter on the key, we can avoid launching tasks
- * on splits that don't have the range covering the key.
- */
-class SplitsPruningRDD[T: ClassManifest](
-    @transient prev: RDD[T],
-    @transient splitsFilterFunc: Int => Boolean)
-  extends RDD[T](prev.context, List(new PruneDependency(prev, splitsFilterFunc))) {
-
-  @transient
-  val _splits: Array[Split] = dependencies_.head.asInstanceOf[PruneDependency[T]].splits
-
-  override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(split, context)
-
-  override protected def getSplits = _splits
-
-  override val partitioner = firstParent[T].partitioner
-}
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index 5a3a12dfff..73846131a9 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -3,7 +3,7 @@ package spark
 import scala.collection.mutable.HashMap
 import org.scalatest.{BeforeAndAfter, FunSuite}
 import spark.SparkContext._
-import spark.rdd.CoalescedRDD
+import spark.rdd.{CoalescedRDD, PartitionPruningRDD}
 
 class RDDSuite extends FunSuite with BeforeAndAfter {
 
@@ -169,11 +169,11 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
     }
   }
 
-  test("split pruning") {
+  test("partition pruning") {
     sc = new SparkContext("local", "test")
     val data = sc.parallelize(1 to 10, 10)
     // Note that split number starts from 0, so > 8 means only 10th partition left.
-    val prunedRdd = data.pruneSplits(splitNum => splitNum > 8)
+    val prunedRdd = new PartitionPruningRDD(data, splitNum => splitNum > 8)
     assert(prunedRdd.splits.size === 1)
     val prunedData = prunedRdd.collect
     assert(prunedData.size === 1)

From c109f29c97c9606dee45e6300d01a272dbb560aa Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 23 Jan 2013 22:22:03 -0800
Subject: [PATCH 196/291] Updated PruneDependency to change "split" to
 "partition".

---
 core/src/main/scala/spark/Dependency.scala             | 10 +++++-----
 .../src/main/scala/spark/rdd/PartitionPruningRDD.scala |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/spark/Dependency.scala
index 7d5858e88e..647aee6eb5 100644
--- a/core/src/main/scala/spark/Dependency.scala
+++ b/core/src/main/scala/spark/Dependency.scala
@@ -64,14 +64,14 @@ class RangeDependency[T](rdd: RDD[T], inStart: Int, outStart: Int, length: Int)
 
 
 /**
- * Represents a dependency between the SplitsPruningRDD and its parent. In this
- * case, the child RDD contains a subset of splits of the parents'.
+ * Represents a dependency between the PartitionPruningRDD and its parent. In this
+ * case, the child RDD contains a subset of partitions of the parents'.
  */
-class PruneDependency[T](rdd: RDD[T], @transient splitsFilterFunc: Int => Boolean)
+class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boolean)
   extends NarrowDependency[T](rdd) {
 
   @transient
-  val splits: Array[Split] = rdd.splits.filter(s => splitsFilterFunc(s.index))
+  val partitions: Array[Split] = rdd.splits.filter(s => partitionFilterFunc(s.index))
 
-  override def getParents(partitionId: Int) = List(splits(partitionId).index)
+  override def getParents(partitionId: Int) = List(partitions(partitionId).index)
 }
diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
index 3048949ef2..787b59ae8c 100644
--- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
@@ -14,7 +14,7 @@ class PartitionPruningRDD[T: ClassManifest](
   extends RDD[T](prev.context, List(new PruneDependency(prev, partitionFilterFunc))) {
 
   @transient
-  val partitions_ : Array[Split] = dependencies_.head.asInstanceOf[PruneDependency[T]].splits
+  val partitions_ : Array[Split] = dependencies_.head.asInstanceOf[PruneDependency[T]].partitions
 
   override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(split, context)
 

From 67a43bc7e622e4dd9d53ccf80b441740d6ff4df5 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 23 Jan 2013 23:06:52 -0800
Subject: [PATCH 197/291] Added a clearDependencies method in
 PartitionPruningRDD.

---
 core/src/main/scala/spark/rdd/PartitionPruningRDD.scala | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
index 787b59ae8c..97dd37950e 100644
--- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
@@ -14,11 +14,16 @@ class PartitionPruningRDD[T: ClassManifest](
   extends RDD[T](prev.context, List(new PruneDependency(prev, partitionFilterFunc))) {
 
   @transient
-  val partitions_ : Array[Split] = dependencies_.head.asInstanceOf[PruneDependency[T]].partitions
+  var partitions_ : Array[Split] = dependencies_.head.asInstanceOf[PruneDependency[T]].partitions
 
   override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(split, context)
 
   override protected def getSplits = partitions_
 
   override val partitioner = firstParent[T].partitioner
+
+  override def clearDependencies() {
+    super.clearDependencies()
+    partitions_ = null
+  }
 }

From 230bda204778e6f3c0f5a20ad341f643146d97cb Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 15 Jan 2013 14:01:19 -0600
Subject: [PATCH 198/291] Add LocalSparkContext to manage common sc variable.

---
 .../test/scala/spark/AccumulatorSuite.scala   | 32 ++------
 .../src/test/scala/spark/BroadcastSuite.scala | 14 +---
 .../test/scala/spark/CheckpointSuite.scala    | 19 ++---
 .../scala/spark/ClosureCleanerSuite.scala     | 73 +++++++++----------
 .../test/scala/spark/DistributedSuite.scala   | 23 ++----
 core/src/test/scala/spark/FailureSuite.scala  | 14 +---
 .../test/scala/spark/FileServerSuite.scala    | 16 ++--
 core/src/test/scala/spark/FileSuite.scala     | 16 +---
 .../test/scala/spark/LocalSparkContext.scala  | 41 +++++++++++
 .../scala/spark/MapOutputTrackerSuite.scala   |  7 +-
 .../test/scala/spark/PartitioningSuite.scala  | 15 +---
 core/src/test/scala/spark/PipedRDDSuite.scala | 16 +---
 core/src/test/scala/spark/RDDSuite.scala      | 14 +---
 core/src/test/scala/spark/ShuffleSuite.scala  | 14 +---
 core/src/test/scala/spark/SortingSuite.scala  | 13 +---
 .../src/test/scala/spark/ThreadingSuite.scala | 14 +---
 .../spark/scheduler/TaskContextSuite.scala    | 14 +---
 17 files changed, 109 insertions(+), 246 deletions(-)
 create mode 100644 core/src/test/scala/spark/LocalSparkContext.scala

diff --git a/core/src/test/scala/spark/AccumulatorSuite.scala b/core/src/test/scala/spark/AccumulatorSuite.scala
index d8be99dde7..78d64a44ae 100644
--- a/core/src/test/scala/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/spark/AccumulatorSuite.scala
@@ -1,6 +1,5 @@
 package spark
 
-import org.scalatest.BeforeAndAfter
 import org.scalatest.FunSuite
 import org.scalatest.matchers.ShouldMatchers
 import collection.mutable
@@ -9,18 +8,7 @@ import scala.math.exp
 import scala.math.signum
 import spark.SparkContext._
 
-class AccumulatorSuite extends FunSuite with ShouldMatchers with BeforeAndAfter {
-
-  var sc: SparkContext = null
-
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
+class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
 
   test ("basic accumulation"){
     sc = new SparkContext("local", "test")
@@ -53,10 +41,7 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
       for (i <- 1 to maxI) {
         v should contain(i)
       }
-      sc.stop()
-      sc = null
-      // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-      System.clearProperty("spark.master.port")
+      resetSparkContext()
     }
   }
 
@@ -86,10 +71,7 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
           x => acc.value += x
         }
       } should produce [SparkException]
-      sc.stop()
-      sc = null
-      // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-      System.clearProperty("spark.master.port")
+      resetSparkContext()
     }
   }
 
@@ -115,10 +97,7 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
         bufferAcc.value should contain(i)
         mapAcc.value should contain (i -> i.toString)
       }
-      sc.stop()
-      sc = null
-      // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-      System.clearProperty("spark.master.port")
+      resetSparkContext()
     }
   }
 
@@ -134,8 +113,7 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
         x => acc.localValue ++= x
       }
       acc.value should be ( (0 to maxI).toSet)
-      sc.stop()
-      sc = null      
+      resetSparkContext()
     }
   }
 
diff --git a/core/src/test/scala/spark/BroadcastSuite.scala b/core/src/test/scala/spark/BroadcastSuite.scala
index 2d3302f0aa..362a31fb0d 100644
--- a/core/src/test/scala/spark/BroadcastSuite.scala
+++ b/core/src/test/scala/spark/BroadcastSuite.scala
@@ -1,20 +1,8 @@
 package spark
 
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 
-class BroadcastSuite extends FunSuite with BeforeAndAfter {
-  
-  var sc: SparkContext = _
-  
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
+class BroadcastSuite extends FunSuite with LocalSparkContext {
   
   test("basic broadcast") {
     sc = new SparkContext("local", "test")
diff --git a/core/src/test/scala/spark/CheckpointSuite.scala b/core/src/test/scala/spark/CheckpointSuite.scala
index 51573254ca..33c317720c 100644
--- a/core/src/test/scala/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/spark/CheckpointSuite.scala
@@ -1,34 +1,27 @@
 package spark
 
-import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.FunSuite
 import java.io.File
 import spark.rdd._
 import spark.SparkContext._
 import storage.StorageLevel
 
-class CheckpointSuite extends FunSuite with BeforeAndAfter with Logging {
+class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
   initLogging()
 
-  var sc: SparkContext = _
   var checkpointDir: File = _
   val partitioner = new HashPartitioner(2)
 
-  before {
+  override def beforeEach() {
+    super.beforeEach()
     checkpointDir = File.createTempFile("temp", "")
     checkpointDir.delete()
-
     sc = new SparkContext("local", "test")
     sc.setCheckpointDir(checkpointDir.toString)
   }
 
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-
+  override def afterEach() {
+    super.afterEach()
     if (checkpointDir != null) {
       checkpointDir.delete()
     }
diff --git a/core/src/test/scala/spark/ClosureCleanerSuite.scala b/core/src/test/scala/spark/ClosureCleanerSuite.scala
index dfa2de80e6..b2d0dd4627 100644
--- a/core/src/test/scala/spark/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/spark/ClosureCleanerSuite.scala
@@ -3,6 +3,7 @@ package spark
 import java.io.NotSerializableException
 
 import org.scalatest.FunSuite
+import spark.LocalSparkContext._
 import SparkContext._
 
 class ClosureCleanerSuite extends FunSuite {
@@ -43,13 +44,10 @@ object TestObject {
   def run(): Int = {
     var nonSer = new NonSerializable
     var x = 5
-    val sc = new SparkContext("local", "test")
-    val nums = sc.parallelize(Array(1, 2, 3, 4))
-    val answer = nums.map(_ + x).reduce(_ + _)
-    sc.stop()
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-    return answer
+    return withSpark(new SparkContext("local", "test")) { sc =>
+      val nums = sc.parallelize(Array(1, 2, 3, 4))
+      nums.map(_ + x).reduce(_ + _)
+    }
   }
 }
 
@@ -60,11 +58,10 @@ class TestClass extends Serializable {
 
   def run(): Int = {
     var nonSer = new NonSerializable
-    val sc = new SparkContext("local", "test")
-    val nums = sc.parallelize(Array(1, 2, 3, 4))
-    val answer = nums.map(_ + getX).reduce(_ + _)
-    sc.stop()
-    return answer
+    return withSpark(new SparkContext("local", "test")) { sc =>
+      val nums = sc.parallelize(Array(1, 2, 3, 4))
+      nums.map(_ + getX).reduce(_ + _)
+    }
   }
 }
 
@@ -73,11 +70,10 @@ class TestClassWithoutDefaultConstructor(x: Int) extends Serializable {
 
   def run(): Int = {
     var nonSer = new NonSerializable
-    val sc = new SparkContext("local", "test")
-    val nums = sc.parallelize(Array(1, 2, 3, 4))
-    val answer = nums.map(_ + getX).reduce(_ + _)
-    sc.stop()
-    return answer
+    return withSpark(new SparkContext("local", "test")) { sc =>
+      val nums = sc.parallelize(Array(1, 2, 3, 4))
+      nums.map(_ + getX).reduce(_ + _)
+    }
   }
 }
 
@@ -89,11 +85,10 @@ class TestClassWithoutFieldAccess {
   def run(): Int = {
     var nonSer2 = new NonSerializable
     var x = 5
-    val sc = new SparkContext("local", "test")
-    val nums = sc.parallelize(Array(1, 2, 3, 4))
-    val answer = nums.map(_ + x).reduce(_ + _)
-    sc.stop()
-    return answer
+    return withSpark(new SparkContext("local", "test")) { sc =>
+      val nums = sc.parallelize(Array(1, 2, 3, 4))
+      nums.map(_ + x).reduce(_ + _)
+    }
   }
 }
 
@@ -102,16 +97,16 @@ object TestObjectWithNesting {
   def run(): Int = {
     var nonSer = new NonSerializable
     var answer = 0
-    val sc = new SparkContext("local", "test")
-    val nums = sc.parallelize(Array(1, 2, 3, 4))
-    var y = 1
-    for (i <- 1 to 4) {
-      var nonSer2 = new NonSerializable
-      var x = i
-      answer += nums.map(_ + x + y).reduce(_ + _)
+    return withSpark(new SparkContext("local", "test")) { sc =>
+      val nums = sc.parallelize(Array(1, 2, 3, 4))
+      var y = 1
+      for (i <- 1 to 4) {
+        var nonSer2 = new NonSerializable
+        var x = i
+        answer += nums.map(_ + x + y).reduce(_ + _)
+      }
+      answer
     }
-    sc.stop()
-    return answer
   }
 }
 
@@ -121,14 +116,14 @@ class TestClassWithNesting(val y: Int) extends Serializable {
   def run(): Int = {
     var nonSer = new NonSerializable
     var answer = 0
-    val sc = new SparkContext("local", "test")
-    val nums = sc.parallelize(Array(1, 2, 3, 4))
-    for (i <- 1 to 4) {
-      var nonSer2 = new NonSerializable
-      var x = i
-      answer += nums.map(_ + x + getY).reduce(_ + _)
+    return withSpark(new SparkContext("local", "test")) { sc =>
+      val nums = sc.parallelize(Array(1, 2, 3, 4))
+      for (i <- 1 to 4) {
+        var nonSer2 = new NonSerializable
+        var x = i
+        answer += nums.map(_ + x + getY).reduce(_ + _)
+      }
+      answer
     }
-    sc.stop()
-    return answer
   }
 }
diff --git a/core/src/test/scala/spark/DistributedSuite.scala b/core/src/test/scala/spark/DistributedSuite.scala
index cacc2796b6..83a2a549a9 100644
--- a/core/src/test/scala/spark/DistributedSuite.scala
+++ b/core/src/test/scala/spark/DistributedSuite.scala
@@ -15,41 +15,28 @@ import scala.collection.mutable.ArrayBuffer
 import SparkContext._
 import storage.StorageLevel
 
-class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter {
+class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter with LocalSparkContext {
 
   val clusterUrl = "local-cluster[2,1,512]"
 
-  @transient var sc: SparkContext = _
-
   after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
     System.clearProperty("spark.reducer.maxMbInFlight")
     System.clearProperty("spark.storage.memoryFraction")
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
   }
 
   test("local-cluster format") {
     sc = new SparkContext("local-cluster[2,1,512]", "test")
     assert(sc.parallelize(1 to 2, 2).count() == 2)
-    sc.stop()
-    System.clearProperty("spark.master.port")
+    resetSparkContext()
     sc = new SparkContext("local-cluster[2 , 1 , 512]", "test")
     assert(sc.parallelize(1 to 2, 2).count() == 2)
-    sc.stop()
-    System.clearProperty("spark.master.port")
+    resetSparkContext()
     sc = new SparkContext("local-cluster[2, 1, 512]", "test")
     assert(sc.parallelize(1 to 2, 2).count() == 2)
-    sc.stop()
-    System.clearProperty("spark.master.port")
+    resetSparkContext()
     sc = new SparkContext("local-cluster[ 2, 1, 512 ]", "test")
     assert(sc.parallelize(1 to 2, 2).count() == 2)
-    sc.stop()
-    System.clearProperty("spark.master.port")
-    sc = null
+    resetSparkContext()
   }
 
   test("simple groupByKey") {
diff --git a/core/src/test/scala/spark/FailureSuite.scala b/core/src/test/scala/spark/FailureSuite.scala
index a3454f25f6..8c1445a465 100644
--- a/core/src/test/scala/spark/FailureSuite.scala
+++ b/core/src/test/scala/spark/FailureSuite.scala
@@ -1,7 +1,6 @@
 package spark
 
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 import org.scalatest.prop.Checkers
 
 import scala.collection.mutable.ArrayBuffer
@@ -23,18 +22,7 @@ object FailureSuiteState {
   }
 }
 
-class FailureSuite extends FunSuite with BeforeAndAfter {
-  
-  var sc: SparkContext = _
-    
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
+class FailureSuite extends FunSuite with LocalSparkContext {
   
   // Run a 3-task map job in which task 1 deterministically fails once, and check
   // whether the job completes successfully and we ran 4 tasks in total.
diff --git a/core/src/test/scala/spark/FileServerSuite.scala b/core/src/test/scala/spark/FileServerSuite.scala
index b4283d9604..8215cbde02 100644
--- a/core/src/test/scala/spark/FileServerSuite.scala
+++ b/core/src/test/scala/spark/FileServerSuite.scala
@@ -2,17 +2,16 @@ package spark
 
 import com.google.common.io.Files
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 import java.io.{File, PrintWriter, FileReader, BufferedReader}
 import SparkContext._
 
-class FileServerSuite extends FunSuite with BeforeAndAfter {
+class FileServerSuite extends FunSuite with LocalSparkContext {
 
-  @transient var sc: SparkContext = _
   @transient var tmpFile : File = _
   @transient var testJarFile : File = _
 
-  before {
+  override def beforeEach() {
+    super.beforeEach()
     // Create a sample text file
     val tmpdir = new File(Files.createTempDir(), "test")
     tmpdir.mkdir()
@@ -22,17 +21,12 @@ class FileServerSuite extends FunSuite with BeforeAndAfter {
     pw.close()
   }
 
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
+  override def afterEach() {
+    super.afterEach()
     // Clean up downloaded file
     if (tmpFile.exists) {
       tmpFile.delete()
     }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
   }
 
   test("Distributing files locally") {
diff --git a/core/src/test/scala/spark/FileSuite.scala b/core/src/test/scala/spark/FileSuite.scala
index 554bea53a9..91b48c7456 100644
--- a/core/src/test/scala/spark/FileSuite.scala
+++ b/core/src/test/scala/spark/FileSuite.scala
@@ -6,24 +6,12 @@ import scala.io.Source
 
 import com.google.common.io.Files
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 import org.apache.hadoop.io._
 
 import SparkContext._
 
-class FileSuite extends FunSuite with BeforeAndAfter {
-  
-  var sc: SparkContext = _
-  
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
-  
+class FileSuite extends FunSuite with LocalSparkContext {
+
   test("text files") {
     sc = new SparkContext("local", "test")
     val tempDir = Files.createTempDir()
diff --git a/core/src/test/scala/spark/LocalSparkContext.scala b/core/src/test/scala/spark/LocalSparkContext.scala
new file mode 100644
index 0000000000..b5e31ddae3
--- /dev/null
+++ b/core/src/test/scala/spark/LocalSparkContext.scala
@@ -0,0 +1,41 @@
+package spark
+
+import org.scalatest.Suite
+import org.scalatest.BeforeAndAfterEach
+
+/** Manages a local `sc` {@link SparkContext} variable, correctly stopping it after each test. */
+trait LocalSparkContext extends BeforeAndAfterEach { self: Suite =>
+
+  @transient var sc: SparkContext = _
+
+  override def afterEach() {
+    resetSparkContext()
+    super.afterEach()
+  }
+
+  def resetSparkContext() = {
+    if (sc != null) {
+      LocalSparkContext.stop(sc)
+      sc = null
+    }
+  }
+
+}
+
+object LocalSparkContext {
+  def stop(sc: SparkContext) {
+    sc.stop()
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.master.port")
+  }
+
+  /** Runs `f` by passing in `sc` and ensures that `sc` is stopped. */
+  def withSpark[T](sc: SparkContext)(f: SparkContext => T) = {
+    try {
+      f(sc)
+    } finally {
+      stop(sc)
+    }
+  }
+
+}
\ No newline at end of file
diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
index d3dd3a8fa4..774bbd65b1 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
@@ -1,17 +1,13 @@
 package spark
 
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 
 import akka.actor._
 import spark.scheduler.MapStatus
 import spark.storage.BlockManagerId
 import spark.util.AkkaUtils
 
-class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
-  after {
-    System.clearProperty("spark.master.port")
-  }
+class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
  
   test("compressSize") {
     assert(MapOutputTracker.compressSize(0L) === 0)
@@ -81,7 +77,6 @@ class MapOutputTrackerSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("remote fetch") {
-    System.clearProperty("spark.master.host")
     val (actorSystem, boundPort) =
       AkkaUtils.createActorSystem("test", "localhost", 0)
     System.setProperty("spark.master.port", boundPort.toString)
diff --git a/core/src/test/scala/spark/PartitioningSuite.scala b/core/src/test/scala/spark/PartitioningSuite.scala
index eb3c8f238f..af1107cd19 100644
--- a/core/src/test/scala/spark/PartitioningSuite.scala
+++ b/core/src/test/scala/spark/PartitioningSuite.scala
@@ -1,25 +1,12 @@
 package spark
 
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 
 import scala.collection.mutable.ArrayBuffer
 
 import SparkContext._
 
-class PartitioningSuite extends FunSuite with BeforeAndAfter {
-  
-  var sc: SparkContext = _
-  
-  after {
-    if(sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
-  
+class PartitioningSuite extends FunSuite with LocalSparkContext {
   
   test("HashPartitioner equality") {
     val p2 = new HashPartitioner(2)
diff --git a/core/src/test/scala/spark/PipedRDDSuite.scala b/core/src/test/scala/spark/PipedRDDSuite.scala
index 9b84b29227..a6344edf8f 100644
--- a/core/src/test/scala/spark/PipedRDDSuite.scala
+++ b/core/src/test/scala/spark/PipedRDDSuite.scala
@@ -1,21 +1,9 @@
 package spark
 
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 import SparkContext._
 
-class PipedRDDSuite extends FunSuite with BeforeAndAfter {
-  
-  var sc: SparkContext = _
-  
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
+class PipedRDDSuite extends FunSuite with LocalSparkContext {
   
   test("basic pipe") {
     sc = new SparkContext("local", "test")
@@ -51,5 +39,3 @@ class PipedRDDSuite extends FunSuite with BeforeAndAfter {
   }
 
 }
-
-
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index db217f8482..592427e97a 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -2,23 +2,11 @@ package spark
 
 import scala.collection.mutable.HashMap
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 
 import spark.rdd.CoalescedRDD
 import SparkContext._
 
-class RDDSuite extends FunSuite with BeforeAndAfter {
-
-  var sc: SparkContext = _
-
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
+class RDDSuite extends FunSuite with LocalSparkContext {
 
   test("basic operations") {
     sc = new SparkContext("local", "test")
diff --git a/core/src/test/scala/spark/ShuffleSuite.scala b/core/src/test/scala/spark/ShuffleSuite.scala
index bebb8ebe86..3493b9511f 100644
--- a/core/src/test/scala/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/spark/ShuffleSuite.scala
@@ -3,7 +3,6 @@ package spark
 import scala.collection.mutable.ArrayBuffer
 
 import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
 import org.scalatest.matchers.ShouldMatchers
 import org.scalatest.prop.Checkers
 import org.scalacheck.Arbitrary._
@@ -15,18 +14,7 @@ import com.google.common.io.Files
 import spark.rdd.ShuffledRDD
 import spark.SparkContext._
 
-class ShuffleSuite extends FunSuite with ShouldMatchers with BeforeAndAfter {
-
-  var sc: SparkContext = _
-
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
+class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
 
   test("groupByKey") {
     sc = new SparkContext("local", "test")
diff --git a/core/src/test/scala/spark/SortingSuite.scala b/core/src/test/scala/spark/SortingSuite.scala
index 1ad11ff4c3..edb8c839fc 100644
--- a/core/src/test/scala/spark/SortingSuite.scala
+++ b/core/src/test/scala/spark/SortingSuite.scala
@@ -5,18 +5,7 @@ import org.scalatest.BeforeAndAfter
 import org.scalatest.matchers.ShouldMatchers
 import SparkContext._
 
-class SortingSuite extends FunSuite with BeforeAndAfter with ShouldMatchers with Logging {
-  
-  var sc: SparkContext = _
-  
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
+class SortingSuite extends FunSuite with LocalSparkContext with ShouldMatchers with Logging {
   
   test("sortByKey") {
     sc = new SparkContext("local", "test")
diff --git a/core/src/test/scala/spark/ThreadingSuite.scala b/core/src/test/scala/spark/ThreadingSuite.scala
index e9b1837d89..ff315b6693 100644
--- a/core/src/test/scala/spark/ThreadingSuite.scala
+++ b/core/src/test/scala/spark/ThreadingSuite.scala
@@ -22,19 +22,7 @@ object ThreadingSuiteState {
   }
 }
 
-class ThreadingSuite extends FunSuite with BeforeAndAfter {
-  
-  var sc: SparkContext = _
-  
-  after {
-    if(sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
-  
+class ThreadingSuite extends FunSuite with LocalSparkContext {
   
   test("accessing SparkContext form a different thread") {
     sc = new SparkContext("local", "test")
diff --git a/core/src/test/scala/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/spark/scheduler/TaskContextSuite.scala
index ba6f8b588f..a5db7103f5 100644
--- a/core/src/test/scala/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/spark/scheduler/TaskContextSuite.scala
@@ -6,19 +6,9 @@ import spark.TaskContext
 import spark.RDD
 import spark.SparkContext
 import spark.Split
+import spark.LocalSparkContext
 
-class TaskContextSuite extends FunSuite with BeforeAndAfter {
-
-  var sc: SparkContext = _
-
-  after {
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
-  }
+class TaskContextSuite extends FunSuite with BeforeAndAfter with LocalSparkContext {
 
   test("Calls executeOnCompleteCallbacks after failure") {
     var completed = false

From b6fc6e67521e8a9a5291693cce3dc766da244395 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 24 Jan 2013 14:28:05 -0800
Subject: [PATCH 199/291] SPARK-541: Adding a warning for invalid Master URL

Right now Spark silently parses master URL's which do not match any
known regex as a Mesos URL. The Mesos error message when an invalid URL gets
passed is really confusing, so this warns the user when the implicit
conversion is happening.
---
 core/src/main/scala/spark/SparkContext.scala | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 66bdbe7cda..bc9fdee8b6 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -112,6 +112,8 @@ class SparkContext(
     val LOCAL_CLUSTER_REGEX = """local-cluster\[\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*]""".r
     // Regular expression for connecting to Spark deploy clusters
     val SPARK_REGEX = """(spark://.*)""".r
+    //Regular expression for connection to Mesos cluster
+    val MESOS_REGEX = """(mesos://.*)""".r
 
     master match {
       case "local" =>
@@ -152,6 +154,9 @@ class SparkContext(
         scheduler
 
       case _ =>
+        if (MESOS_REGEX.findFirstIn(master).isEmpty) {
+          logWarning("Master %s does not match expected format, parsing as Mesos URL".format(master))
+        }
         MesosNativeLibrary.load()
         val scheduler = new ClusterScheduler(this)
         val coarseGrained = System.getProperty("spark.mesos.coarse", "false").toBoolean

From 7dfb82a992d47491174d7929e31351d26cadfcda Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 22 Jan 2013 15:25:41 -0600
Subject: [PATCH 200/291] Replace old 'master' term with 'driver'.

---
 bagel/src/test/scala/bagel/BagelSuite.scala   |  2 +-
 .../main/scala/spark/MapOutputTracker.scala   | 10 +--
 core/src/main/scala/spark/SparkContext.scala  | 20 ++---
 core/src/main/scala/spark/SparkEnv.scala      | 22 +++---
 .../spark/broadcast/BitTorrentBroadcast.scala | 24 +++---
 .../scala/spark/broadcast/Broadcast.scala     |  6 +-
 .../spark/broadcast/BroadcastFactory.scala    |  4 +-
 .../scala/spark/broadcast/HttpBroadcast.scala |  6 +-
 .../scala/spark/broadcast/MultiTracker.scala  | 35 +++++----
 .../scala/spark/broadcast/TreeBroadcast.scala | 52 ++++++-------
 .../spark/deploy/LocalSparkCluster.scala      | 34 ++++-----
 .../spark/deploy/client/ClientListener.scala  |  4 +-
 .../scala/spark/deploy/master/JobInfo.scala   |  2 +-
 .../scala/spark/deploy/master/Master.scala    | 18 ++---
 .../executor/StandaloneExecutorBackend.scala  | 26 +++----
 .../cluster/SparkDeploySchedulerBackend.scala | 33 +++++----
 .../cluster/StandaloneClusterMessage.scala    |  8 +-
 .../cluster/StandaloneSchedulerBackend.scala  | 74 +++++++++----------
 .../mesos/CoarseMesosSchedulerBackend.scala   |  6 +-
 .../spark/storage/BlockManagerMaster.scala    | 69 +++++++++--------
 .../scala/spark/storage/ThreadingTest.scala   |  6 +-
 core/src/test/scala/spark/JavaAPISuite.java   |  2 +-
 .../test/scala/spark/LocalSparkContext.scala  |  2 +-
 .../scala/spark/MapOutputTrackerSuite.scala   |  2 +-
 docs/configuration.md                         | 12 +--
 python/pyspark/tests.py                       |  2 +-
 .../src/test/scala/spark/repl/ReplSuite.scala |  2 +-
 .../dstream/NetworkInputDStream.scala         |  4 +-
 .../java/spark/streaming/JavaAPISuite.java    |  2 +-
 .../streaming/BasicOperationsSuite.scala      |  2 +-
 .../spark/streaming/CheckpointSuite.scala     |  2 +-
 .../scala/spark/streaming/FailureSuite.scala  |  2 +-
 .../spark/streaming/InputStreamsSuite.scala   |  2 +-
 .../streaming/WindowOperationsSuite.scala     |  2 +-
 34 files changed, 248 insertions(+), 251 deletions(-)

diff --git a/bagel/src/test/scala/bagel/BagelSuite.scala b/bagel/src/test/scala/bagel/BagelSuite.scala
index ca59f46843..3c2f9c4616 100644
--- a/bagel/src/test/scala/bagel/BagelSuite.scala
+++ b/bagel/src/test/scala/bagel/BagelSuite.scala
@@ -23,7 +23,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter {
       sc = null
     }
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
+    System.clearProperty("spark.driver.port")
   }
   
   test("halting by voting") {
diff --git a/core/src/main/scala/spark/MapOutputTracker.scala b/core/src/main/scala/spark/MapOutputTracker.scala
index ac02f3363a..d4f5164f7d 100644
--- a/core/src/main/scala/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/spark/MapOutputTracker.scala
@@ -38,10 +38,7 @@ private[spark] class MapOutputTrackerActor(tracker: MapOutputTracker) extends Ac
   }
 }
 
-private[spark] class MapOutputTracker(actorSystem: ActorSystem, isMaster: Boolean) extends Logging {
-  val ip: String = System.getProperty("spark.master.host", "localhost")
-  val port: Int = System.getProperty("spark.master.port", "7077").toInt
-  val actorName: String = "MapOutputTracker"
+private[spark] class MapOutputTracker(actorSystem: ActorSystem, isDriver: Boolean) extends Logging {
 
   val timeout = 10.seconds
 
@@ -56,11 +53,14 @@ private[spark] class MapOutputTracker(actorSystem: ActorSystem, isMaster: Boolea
   var cacheGeneration = generation
   val cachedSerializedStatuses = new TimeStampedHashMap[Int, Array[Byte]]
 
-  var trackerActor: ActorRef = if (isMaster) {
+  val actorName: String = "MapOutputTracker"
+  var trackerActor: ActorRef = if (isDriver) {
     val actor = actorSystem.actorOf(Props(new MapOutputTrackerActor(this)), name = actorName)
     logInfo("Registered MapOutputTrackerActor actor")
     actor
   } else {
+    val ip = System.getProperty("spark.driver.host", "localhost")
+    val port = System.getProperty("spark.driver.port", "7077").toInt
     val url = "akka://spark@%s:%s/user/%s".format(ip, port, actorName)
     actorSystem.actorFor(url)
   }
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index bc9fdee8b6..d4991cb1e0 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -66,20 +66,20 @@ class SparkContext(
   // Ensure logging is initialized before we spawn any threads
   initLogging()
 
-  // Set Spark master host and port system properties
-  if (System.getProperty("spark.master.host") == null) {
-    System.setProperty("spark.master.host", Utils.localIpAddress)
+  // Set Spark driver host and port system properties
+  if (System.getProperty("spark.driver.host") == null) {
+    System.setProperty("spark.driver.host", Utils.localIpAddress)
   }
-  if (System.getProperty("spark.master.port") == null) {
-    System.setProperty("spark.master.port", "0")
+  if (System.getProperty("spark.driver.port") == null) {
+    System.setProperty("spark.driver.port", "0")
   }
 
   private val isLocal = (master == "local" || master.startsWith("local["))
 
   // Create the Spark execution environment (cache, map output tracker, etc)
   private[spark] val env = SparkEnv.createFromSystemProperties(
-    System.getProperty("spark.master.host"),
-    System.getProperty("spark.master.port").toInt,
+    System.getProperty("spark.driver.host"),
+    System.getProperty("spark.driver.port").toInt,
     true,
     isLocal)
   SparkEnv.set(env)
@@ -396,14 +396,14 @@ class SparkContext(
 
   /**
    * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values
-   * to using the `+=` method. Only the master can access the accumulator's `value`.
+   * to using the `+=` method. Only the driver can access the accumulator's `value`.
    */
   def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]) =
     new Accumulator(initialValue, param)
 
   /**
    * Create an [[spark.Accumulable]] shared variable, to which tasks can add values with `+=`.
-   * Only the master can access the accumuable's `value`.
+   * Only the driver can access the accumuable's `value`.
    * @tparam T accumulator type
    * @tparam R type that can be added to the accumulator
    */
@@ -530,7 +530,7 @@ class SparkContext(
   /**
    * Run a function on a given set of partitions in an RDD and return the results. This is the main
    * entry point to the scheduler, by which all actions get launched. The allowLocal flag specifies
-   * whether the scheduler can run the computation on the master rather than shipping it out to the
+   * whether the scheduler can run the computation on the driver rather than shipping it out to the
    * cluster, for short actions like first().
    */
   def runJob[T, U: ClassManifest](
diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala
index 2a7a8af83d..4034af610c 100644
--- a/core/src/main/scala/spark/SparkEnv.scala
+++ b/core/src/main/scala/spark/SparkEnv.scala
@@ -60,15 +60,15 @@ object SparkEnv extends Logging {
   def createFromSystemProperties(
       hostname: String,
       port: Int,
-      isMaster: Boolean,
+      isDriver: Boolean,
       isLocal: Boolean
     ) : SparkEnv = {
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, port)
 
-    // Bit of a hack: If this is the master and our port was 0 (meaning bind to any free port),
-    // figure out which port number Akka actually bound to and set spark.master.port to it.
-    if (isMaster && port == 0) {
-      System.setProperty("spark.master.port", boundPort.toString)
+    // Bit of a hack: If this is the driver and our port was 0 (meaning bind to any free port),
+    // figure out which port number Akka actually bound to and set spark.driver.port to it.
+    if (isDriver && port == 0) {
+      System.setProperty("spark.driver.port", boundPort.toString)
     }
 
     val classLoader = Thread.currentThread.getContextClassLoader
@@ -82,22 +82,22 @@ object SparkEnv extends Logging {
 
     val serializer = instantiateClass[Serializer]("spark.serializer", "spark.JavaSerializer")
 
-    val masterIp: String = System.getProperty("spark.master.host", "localhost")
-    val masterPort: Int = System.getProperty("spark.master.port", "7077").toInt
+    val driverIp: String = System.getProperty("spark.driver.host", "localhost")
+    val driverPort: Int = System.getProperty("spark.driver.port", "7077").toInt
     val blockManagerMaster = new BlockManagerMaster(
-      actorSystem, isMaster, isLocal, masterIp, masterPort)
+      actorSystem, isDriver, isLocal, driverIp, driverPort)
     val blockManager = new BlockManager(actorSystem, blockManagerMaster, serializer)
 
     val connectionManager = blockManager.connectionManager
 
-    val broadcastManager = new BroadcastManager(isMaster)
+    val broadcastManager = new BroadcastManager(isDriver)
 
     val closureSerializer = instantiateClass[Serializer](
       "spark.closure.serializer", "spark.JavaSerializer")
 
     val cacheManager = new CacheManager(blockManager)
 
-    val mapOutputTracker = new MapOutputTracker(actorSystem, isMaster)
+    val mapOutputTracker = new MapOutputTracker(actorSystem, isDriver)
 
     val shuffleFetcher = instantiateClass[ShuffleFetcher](
       "spark.shuffle.fetcher", "spark.BlockStoreShuffleFetcher")
@@ -109,7 +109,7 @@ object SparkEnv extends Logging {
     // Set the sparkFiles directory, used when downloading dependencies.  In local mode,
     // this is a temporary directory; in distributed mode, this is the executor's current working
     // directory.
-    val sparkFilesDir: String = if (isMaster) {
+    val sparkFilesDir: String = if (isDriver) {
       Utils.createTempDir().getAbsolutePath
     } else {
       "."
diff --git a/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala b/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala
index 386f505f2a..adcb2d2415 100644
--- a/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala
+++ b/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala
@@ -31,7 +31,7 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
   @transient var totalBlocks = -1
   @transient var hasBlocks = new AtomicInteger(0)
 
-  // Used ONLY by Master to track how many unique blocks have been sent out
+  // Used ONLY by driver to track how many unique blocks have been sent out
   @transient var sentBlocks = new AtomicInteger(0)
 
   @transient var listenPortLock = new Object
@@ -42,7 +42,7 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
 
   @transient var serveMR: ServeMultipleRequests = null
 
-  // Used only in Master
+  // Used only in driver
   @transient var guideMR: GuideMultipleRequests = null
 
   // Used only in Workers
@@ -99,14 +99,14 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
     }
 
     // Must always come AFTER listenPort is created
-    val masterSource =
+    val driverSource =
       SourceInfo(hostAddress, listenPort, totalBlocks, totalBytes)
     hasBlocksBitVector.synchronized {
-      masterSource.hasBlocksBitVector = hasBlocksBitVector
+      driverSource.hasBlocksBitVector = hasBlocksBitVector
     }
 
     // In the beginning, this is the only known source to Guide
-    listOfSources += masterSource
+    listOfSources += driverSource
 
     // Register with the Tracker
     MultiTracker.registerBroadcast(id,
@@ -122,7 +122,7 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
 
         case None =>
           logInfo("Started reading broadcast variable " + id)
-          // Initializing everything because Master will only send null/0 values
+          // Initializing everything because driver will only send null/0 values
           // Only the 1st worker in a node can be here. Others will get from cache
           initializeWorkerVariables()
 
@@ -151,7 +151,7 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
     }
   }
 
-  // Initialize variables in the worker node. Master sends everything as 0/null
+  // Initialize variables in the worker node. Driver sends everything as 0/null
   private def initializeWorkerVariables() {
     arrayOfBlocks = null
     hasBlocksBitVector = null
@@ -248,7 +248,7 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
       // Receive source information from Guide
       var suitableSources =
         oisGuide.readObject.asInstanceOf[ListBuffer[SourceInfo]]
-      logDebug("Received suitableSources from Master " + suitableSources)
+      logDebug("Received suitableSources from Driver " + suitableSources)
 
       addToListOfSources(suitableSources)
 
@@ -532,7 +532,7 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
               oosSource.writeObject(blockToAskFor)
               oosSource.flush()
 
-              // CHANGED: Master might send some other block than the one
+              // CHANGED: Driver might send some other block than the one
               // requested to ensure fast spreading of all blocks.
               val recvStartTime = System.currentTimeMillis
               val bcBlock = oisSource.readObject.asInstanceOf[BroadcastBlock]
@@ -982,9 +982,9 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
             // Receive which block to send
             var blockToSend = ois.readObject.asInstanceOf[Int]
 
-            // If it is master AND at least one copy of each block has not been
+            // If it is driver AND at least one copy of each block has not been
             // sent out already, MODIFY blockToSend
-            if (MultiTracker.isMaster && sentBlocks.get < totalBlocks) {
+            if (MultiTracker.isDriver && sentBlocks.get < totalBlocks) {
               blockToSend = sentBlocks.getAndIncrement
             }
 
@@ -1031,7 +1031,7 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
 
 private[spark] class BitTorrentBroadcastFactory
 extends BroadcastFactory {
-  def initialize(isMaster: Boolean) { MultiTracker.initialize(isMaster) }
+  def initialize(isDriver: Boolean) { MultiTracker.initialize(isDriver) }
 
   def newBroadcast[T](value_ : T, isLocal: Boolean, id: Long) =
     new BitTorrentBroadcast[T](value_, isLocal, id)
diff --git a/core/src/main/scala/spark/broadcast/Broadcast.scala b/core/src/main/scala/spark/broadcast/Broadcast.scala
index 2ffe7f741d..415bde5d67 100644
--- a/core/src/main/scala/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/spark/broadcast/Broadcast.scala
@@ -15,7 +15,7 @@ abstract class Broadcast[T](private[spark] val id: Long) extends Serializable {
 }
 
 private[spark] 
-class BroadcastManager(val isMaster_ : Boolean) extends Logging with Serializable {
+class BroadcastManager(val _isDriver: Boolean) extends Logging with Serializable {
 
   private var initialized = false
   private var broadcastFactory: BroadcastFactory = null
@@ -33,7 +33,7 @@ class BroadcastManager(val isMaster_ : Boolean) extends Logging with Serializabl
           Class.forName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory]
 
         // Initialize appropriate BroadcastFactory and BroadcastObject
-        broadcastFactory.initialize(isMaster)
+        broadcastFactory.initialize(isDriver)
 
         initialized = true
       }
@@ -49,5 +49,5 @@ class BroadcastManager(val isMaster_ : Boolean) extends Logging with Serializabl
   def newBroadcast[T](value_ : T, isLocal: Boolean) =
     broadcastFactory.newBroadcast[T](value_, isLocal, nextBroadcastId.getAndIncrement())
 
-  def isMaster = isMaster_
+  def isDriver = _isDriver
 }
diff --git a/core/src/main/scala/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/spark/broadcast/BroadcastFactory.scala
index ab6d302827..5c6184c3c7 100644
--- a/core/src/main/scala/spark/broadcast/BroadcastFactory.scala
+++ b/core/src/main/scala/spark/broadcast/BroadcastFactory.scala
@@ -7,7 +7,7 @@ package spark.broadcast
  * entire Spark job.
  */
 private[spark] trait BroadcastFactory {
-  def initialize(isMaster: Boolean): Unit
-  def newBroadcast[T](value_ : T, isLocal: Boolean, id: Long): Broadcast[T]
+  def initialize(isDriver: Boolean): Unit
+  def newBroadcast[T](value: T, isLocal: Boolean, id: Long): Broadcast[T]
   def stop(): Unit
 }
diff --git a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
index 8e490e6bad..7e30b8f7d2 100644
--- a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
@@ -48,7 +48,7 @@ extends Broadcast[T](id) with Logging with Serializable {
 }
 
 private[spark] class HttpBroadcastFactory extends BroadcastFactory {
-  def initialize(isMaster: Boolean) { HttpBroadcast.initialize(isMaster) }
+  def initialize(isDriver: Boolean) { HttpBroadcast.initialize(isDriver) }
 
   def newBroadcast[T](value_ : T, isLocal: Boolean, id: Long) =
     new HttpBroadcast[T](value_, isLocal, id)
@@ -69,12 +69,12 @@ private object HttpBroadcast extends Logging {
   private val cleaner = new MetadataCleaner("HttpBroadcast", cleanup)
 
 
-  def initialize(isMaster: Boolean) {
+  def initialize(isDriver: Boolean) {
     synchronized {
       if (!initialized) {
         bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
         compress = System.getProperty("spark.broadcast.compress", "true").toBoolean
-        if (isMaster) {
+        if (isDriver) {
           createServer()
         }
         serverUri = System.getProperty("spark.httpBroadcast.uri")
diff --git a/core/src/main/scala/spark/broadcast/MultiTracker.scala b/core/src/main/scala/spark/broadcast/MultiTracker.scala
index 5e76dedb94..3fd77af73f 100644
--- a/core/src/main/scala/spark/broadcast/MultiTracker.scala
+++ b/core/src/main/scala/spark/broadcast/MultiTracker.scala
@@ -23,25 +23,24 @@ extends Logging {
   var ranGen = new Random
 
   private var initialized = false
-  private var isMaster_ = false
+  private var _isDriver = false
 
   private var stopBroadcast = false
 
   private var trackMV: TrackMultipleValues = null
 
-  def initialize(isMaster__ : Boolean) {
+  def initialize(__isDriver: Boolean) {
     synchronized {
       if (!initialized) {
+        _isDriver = __isDriver
 
-        isMaster_ = isMaster__
-
-        if (isMaster) {
+        if (isDriver) {
           trackMV = new TrackMultipleValues
           trackMV.setDaemon(true)
           trackMV.start()
         
-          // Set masterHostAddress to the master's IP address for the slaves to read
-          System.setProperty("spark.MultiTracker.MasterHostAddress", Utils.localIpAddress)
+          // Set DriverHostAddress to the driver's IP address for the slaves to read
+          System.setProperty("spark.MultiTracker.DriverHostAddress", Utils.localIpAddress)
         }
 
         initialized = true
@@ -54,10 +53,10 @@ extends Logging {
   }
 
   // Load common parameters
-  private var MasterHostAddress_ = System.getProperty(
-    "spark.MultiTracker.MasterHostAddress", "")
-  private var MasterTrackerPort_ = System.getProperty(
-    "spark.broadcast.masterTrackerPort", "11111").toInt
+  private var DriverHostAddress_ = System.getProperty(
+    "spark.MultiTracker.DriverHostAddress", "")
+  private var DriverTrackerPort_ = System.getProperty(
+    "spark.broadcast.driverTrackerPort", "11111").toInt
   private var BlockSize_ = System.getProperty(
     "spark.broadcast.blockSize", "4096").toInt * 1024
   private var MaxRetryCount_ = System.getProperty(
@@ -91,11 +90,11 @@ extends Logging {
   private var EndGameFraction_ = System.getProperty(
       "spark.broadcast.endGameFraction", "0.95").toDouble
 
-  def isMaster = isMaster_
+  def isDriver = _isDriver
 
   // Common config params
-  def MasterHostAddress = MasterHostAddress_
-  def MasterTrackerPort = MasterTrackerPort_
+  def DriverHostAddress = DriverHostAddress_
+  def DriverTrackerPort = DriverTrackerPort_
   def BlockSize = BlockSize_
   def MaxRetryCount = MaxRetryCount_
 
@@ -123,7 +122,7 @@ extends Logging {
       var threadPool = Utils.newDaemonCachedThreadPool()
       var serverSocket: ServerSocket = null
 
-      serverSocket = new ServerSocket(MasterTrackerPort)
+      serverSocket = new ServerSocket(DriverTrackerPort)
       logInfo("TrackMultipleValues started at " + serverSocket)
 
       try {
@@ -235,7 +234,7 @@ extends Logging {
       try {
         // Connect to the tracker to find out GuideInfo
         clientSocketToTracker =
-          new Socket(MultiTracker.MasterHostAddress, MultiTracker.MasterTrackerPort)
+          new Socket(MultiTracker.DriverHostAddress, MultiTracker.DriverTrackerPort)
         oosTracker =
           new ObjectOutputStream(clientSocketToTracker.getOutputStream)
         oosTracker.flush()
@@ -276,7 +275,7 @@ extends Logging {
   }
   
   def registerBroadcast(id: Long, gInfo: SourceInfo) {
-    val socket = new Socket(MultiTracker.MasterHostAddress, MasterTrackerPort)
+    val socket = new Socket(MultiTracker.DriverHostAddress, DriverTrackerPort)
     val oosST = new ObjectOutputStream(socket.getOutputStream)
     oosST.flush()
     val oisST = new ObjectInputStream(socket.getInputStream)
@@ -303,7 +302,7 @@ extends Logging {
   }
 
   def unregisterBroadcast(id: Long) {
-    val socket = new Socket(MultiTracker.MasterHostAddress, MasterTrackerPort)
+    val socket = new Socket(MultiTracker.DriverHostAddress, DriverTrackerPort)
     val oosST = new ObjectOutputStream(socket.getOutputStream)
     oosST.flush()
     val oisST = new ObjectInputStream(socket.getInputStream)
diff --git a/core/src/main/scala/spark/broadcast/TreeBroadcast.scala b/core/src/main/scala/spark/broadcast/TreeBroadcast.scala
index f573512835..c55c476117 100644
--- a/core/src/main/scala/spark/broadcast/TreeBroadcast.scala
+++ b/core/src/main/scala/spark/broadcast/TreeBroadcast.scala
@@ -98,7 +98,7 @@ extends Broadcast[T](id) with Logging with Serializable {
 
         case None =>
           logInfo("Started reading broadcast variable " + id)
-          // Initializing everything because Master will only send null/0 values
+          // Initializing everything because Driver will only send null/0 values
           // Only the 1st worker in a node can be here. Others will get from cache
           initializeWorkerVariables()
 
@@ -157,55 +157,55 @@ extends Broadcast[T](id) with Logging with Serializable {
       listenPortLock.synchronized { listenPortLock.wait() }
     }
 
-    var clientSocketToMaster: Socket = null
-    var oosMaster: ObjectOutputStream = null
-    var oisMaster: ObjectInputStream = null
+    var clientSocketToDriver: Socket = null
+    var oosDriver: ObjectOutputStream = null
+    var oisDriver: ObjectInputStream = null
 
     // Connect and receive broadcast from the specified source, retrying the
     // specified number of times in case of failures
     var retriesLeft = MultiTracker.MaxRetryCount
     do {
-      // Connect to Master and send this worker's Information
-      clientSocketToMaster = new Socket(MultiTracker.MasterHostAddress, gInfo.listenPort)
-      oosMaster = new ObjectOutputStream(clientSocketToMaster.getOutputStream)
-      oosMaster.flush()
-      oisMaster = new ObjectInputStream(clientSocketToMaster.getInputStream)
+      // Connect to Driver and send this worker's Information
+      clientSocketToDriver = new Socket(MultiTracker.DriverHostAddress, gInfo.listenPort)
+      oosDriver = new ObjectOutputStream(clientSocketToDriver.getOutputStream)
+      oosDriver.flush()
+      oisDriver = new ObjectInputStream(clientSocketToDriver.getInputStream)
 
-      logDebug("Connected to Master's guiding object")
+      logDebug("Connected to Driver's guiding object")
 
       // Send local source information
-      oosMaster.writeObject(SourceInfo(hostAddress, listenPort))
-      oosMaster.flush()
+      oosDriver.writeObject(SourceInfo(hostAddress, listenPort))
+      oosDriver.flush()
 
-      // Receive source information from Master
-      var sourceInfo = oisMaster.readObject.asInstanceOf[SourceInfo]
+      // Receive source information from Driver
+      var sourceInfo = oisDriver.readObject.asInstanceOf[SourceInfo]
       totalBlocks = sourceInfo.totalBlocks
       arrayOfBlocks = new Array[BroadcastBlock](totalBlocks)
       totalBlocksLock.synchronized { totalBlocksLock.notifyAll() }
       totalBytes = sourceInfo.totalBytes
 
-      logDebug("Received SourceInfo from Master:" + sourceInfo + " My Port: " + listenPort)
+      logDebug("Received SourceInfo from Driver:" + sourceInfo + " My Port: " + listenPort)
 
       val start = System.nanoTime
       val receptionSucceeded = receiveSingleTransmission(sourceInfo)
       val time = (System.nanoTime - start) / 1e9
 
-      // Updating some statistics in sourceInfo. Master will be using them later
+      // Updating some statistics in sourceInfo. Driver will be using them later
       if (!receptionSucceeded) {
         sourceInfo.receptionFailed = true
       }
 
-      // Send back statistics to the Master
-      oosMaster.writeObject(sourceInfo)
+      // Send back statistics to the Driver
+      oosDriver.writeObject(sourceInfo)
 
-      if (oisMaster != null) {
-        oisMaster.close()
+      if (oisDriver != null) {
+        oisDriver.close()
       }
-      if (oosMaster != null) {
-        oosMaster.close()
+      if (oosDriver != null) {
+        oosDriver.close()
       }
-      if (clientSocketToMaster != null) {
-        clientSocketToMaster.close()
+      if (clientSocketToDriver != null) {
+        clientSocketToDriver.close()
       }
 
       retriesLeft -= 1
@@ -552,7 +552,7 @@ extends Broadcast[T](id) with Logging with Serializable {
       }
 
       private def sendObject() {
-        // Wait till receiving the SourceInfo from Master
+        // Wait till receiving the SourceInfo from Driver
         while (totalBlocks == -1) {
           totalBlocksLock.synchronized { totalBlocksLock.wait() }
         }
@@ -576,7 +576,7 @@ extends Broadcast[T](id) with Logging with Serializable {
 
 private[spark] class TreeBroadcastFactory
 extends BroadcastFactory {
-  def initialize(isMaster: Boolean) { MultiTracker.initialize(isMaster) }
+  def initialize(isDriver: Boolean) { MultiTracker.initialize(isDriver) }
 
   def newBroadcast[T](value_ : T, isLocal: Boolean, id: Long) =
     new TreeBroadcast[T](value_, isLocal, id)
diff --git a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
index 4211d80596..ae083efc8d 100644
--- a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
@@ -10,7 +10,7 @@ import spark.{Logging, Utils}
 import scala.collection.mutable.ArrayBuffer
 
 private[spark]
-class LocalSparkCluster(numSlaves: Int, coresPerSlave: Int, memoryPerSlave: Int) extends Logging {
+class LocalSparkCluster(numWorkers: Int, coresPerWorker: Int, memoryPerWorker: Int) extends Logging {
   
   val localIpAddress = Utils.localIpAddress
   
@@ -19,33 +19,31 @@ class LocalSparkCluster(numSlaves: Int, coresPerSlave: Int, memoryPerSlave: Int)
   var masterPort : Int = _
   var masterUrl : String = _
   
-  val slaveActorSystems = ArrayBuffer[ActorSystem]()
-  val slaveActors = ArrayBuffer[ActorRef]()
+  val workerActorSystems = ArrayBuffer[ActorSystem]()
+  val workerActors = ArrayBuffer[ActorRef]()
   
   def start() : String = {
-    logInfo("Starting a local Spark cluster with " + numSlaves + " slaves.")
+    logInfo("Starting a local Spark cluster with " + numWorkers + " workers.")
 
     /* Start the Master */
     val (actorSystem, masterPort) = AkkaUtils.createActorSystem("sparkMaster", localIpAddress, 0)
     masterActorSystem = actorSystem
     masterUrl = "spark://" + localIpAddress + ":" + masterPort
-    val actor = masterActorSystem.actorOf(
+    masterActor = masterActorSystem.actorOf(
       Props(new Master(localIpAddress, masterPort, 0)), name = "Master")
-    masterActor = actor
 
-    /* Start the Slaves */
-    for (slaveNum <- 1 to numSlaves) {
-      /* We can pretend to test distributed stuff by giving the slaves distinct hostnames.
+    /* Start the Workers */
+    for (workerNum <- 1 to numWorkers) {
+      /* We can pretend to test distributed stuff by giving the workers distinct hostnames.
          All of 127/8 should be a loopback, we use 127.100.*.* in hopes that it is
          sufficiently distinctive. */
-      val slaveIpAddress = "127.100.0." + (slaveNum % 256)
+      val workerIpAddress = "127.100.0." + (workerNum % 256)
       val (actorSystem, boundPort) = 
-        AkkaUtils.createActorSystem("sparkWorker" + slaveNum, slaveIpAddress, 0)
-      slaveActorSystems += actorSystem
-      val actor = actorSystem.actorOf(
-        Props(new Worker(slaveIpAddress, boundPort, 0, coresPerSlave, memoryPerSlave, masterUrl)),
+        AkkaUtils.createActorSystem("sparkWorker" + workerNum, workerIpAddress, 0)
+      workerActorSystems += actorSystem
+      workerActors += actorSystem.actorOf(
+        Props(new Worker(workerIpAddress, boundPort, 0, coresPerWorker, memoryPerWorker, masterUrl)),
         name = "Worker")
-      slaveActors += actor
     }
 
     return masterUrl
@@ -53,9 +51,9 @@ class LocalSparkCluster(numSlaves: Int, coresPerSlave: Int, memoryPerSlave: Int)
 
   def stop() {
     logInfo("Shutting down local Spark cluster.")
-    // Stop the slaves before the master so they don't get upset that it disconnected
-    slaveActorSystems.foreach(_.shutdown())
-    slaveActorSystems.foreach(_.awaitTermination())
+    // Stop the workers before the master so they don't get upset that it disconnected
+    workerActorSystems.foreach(_.shutdown())
+    workerActorSystems.foreach(_.awaitTermination())
     masterActorSystem.shutdown()
     masterActorSystem.awaitTermination()
   }
diff --git a/core/src/main/scala/spark/deploy/client/ClientListener.scala b/core/src/main/scala/spark/deploy/client/ClientListener.scala
index da6abcc9c2..7035f4b394 100644
--- a/core/src/main/scala/spark/deploy/client/ClientListener.scala
+++ b/core/src/main/scala/spark/deploy/client/ClientListener.scala
@@ -12,7 +12,7 @@ private[spark] trait ClientListener {
 
   def disconnected(): Unit
 
-  def executorAdded(id: String, workerId: String, host: String, cores: Int, memory: Int): Unit
+  def executorAdded(fullId: String, workerId: String, host: String, cores: Int, memory: Int): Unit
 
-  def executorRemoved(id: String, message: String, exitStatus: Option[Int]): Unit
+  def executorRemoved(fullId: String, message: String, exitStatus: Option[Int]): Unit
 }
diff --git a/core/src/main/scala/spark/deploy/master/JobInfo.scala b/core/src/main/scala/spark/deploy/master/JobInfo.scala
index 130b031a2a..a274b21c34 100644
--- a/core/src/main/scala/spark/deploy/master/JobInfo.scala
+++ b/core/src/main/scala/spark/deploy/master/JobInfo.scala
@@ -10,7 +10,7 @@ private[spark] class JobInfo(
     val id: String,
     val desc: JobDescription,
     val submitDate: Date,
-    val actor: ActorRef)
+    val driver: ActorRef)
 {
   var state = JobState.WAITING
   var executors = new mutable.HashMap[Int, ExecutorInfo]
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
index 2c2cd0231b..3347207c6d 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -88,7 +88,7 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
       execOption match {
         case Some(exec) => {
           exec.state = state
-          exec.job.actor ! ExecutorUpdated(execId, state, message, exitStatus)
+          exec.job.driver ! ExecutorUpdated(execId, state, message, exitStatus)
           if (ExecutorState.isFinished(state)) {
             val jobInfo = idToJob(jobId)
             // Remove this executor from the worker and job
@@ -199,7 +199,7 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
     worker.actor ! LaunchExecutor(exec.job.id, exec.id, exec.job.desc, exec.cores, exec.memory, sparkHome)
-    exec.job.actor ! ExecutorAdded(exec.id, worker.id, worker.host, exec.cores, exec.memory)
+    exec.job.driver ! ExecutorAdded(exec.id, worker.id, worker.host, exec.cores, exec.memory)
   }
 
   def addWorker(id: String, host: String, port: Int, cores: Int, memory: Int, webUiPort: Int,
@@ -221,19 +221,19 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
     actorToWorker -= worker.actor
     addressToWorker -= worker.actor.path.address
     for (exec <- worker.executors.values) {
-      exec.job.actor ! ExecutorStateChanged(exec.job.id, exec.id, ExecutorState.LOST, None, None)
+      exec.job.driver ! ExecutorStateChanged(exec.job.id, exec.id, ExecutorState.LOST, None, None)
       exec.job.executors -= exec.id
     }
   }
 
-  def addJob(desc: JobDescription, actor: ActorRef): JobInfo = {
+  def addJob(desc: JobDescription, driver: ActorRef): JobInfo = {
     val now = System.currentTimeMillis()
     val date = new Date(now)
-    val job = new JobInfo(now, newJobId(date), desc, date, actor)
+    val job = new JobInfo(now, newJobId(date), desc, date, driver)
     jobs += job
     idToJob(job.id) = job
-    actorToJob(sender) = job
-    addressToJob(sender.path.address) = job
+    actorToJob(driver) = job
+    addressToJob(driver.path.address) = job
     return job
   }
 
@@ -242,8 +242,8 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
       logInfo("Removing job " + job.id)
       jobs -= job
       idToJob -= job.id
-      actorToJob -= job.actor
-      addressToWorker -= job.actor.path.address
+      actorToJob -= job.driver
+      addressToWorker -= job.driver.path.address
       completedJobs += job   // Remember it in our history
       waitingJobs -= job
       for (exec <- job.executors.values) {
diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
index a29bf974d2..f80f1b5274 100644
--- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
+++ b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
@@ -16,33 +16,33 @@ import spark.scheduler.cluster.RegisterSlave
 
 private[spark] class StandaloneExecutorBackend(
     executor: Executor,
-    masterUrl: String,
-    slaveId: String,
+    driverUrl: String,
+    workerId: String,
     hostname: String,
     cores: Int)
   extends Actor
   with ExecutorBackend
   with Logging {
 
-  var master: ActorRef = null
+  var driver: ActorRef = null
 
   override def preStart() {
     try {
-      logInfo("Connecting to master: " + masterUrl)
-      master = context.actorFor(masterUrl)
-      master ! RegisterSlave(slaveId, hostname, cores)
+      logInfo("Connecting to driver: " + driverUrl)
+      driver = context.actorFor(driverUrl)
+      driver ! RegisterSlave(workerId, hostname, cores)
       context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-      context.watch(master) // Doesn't work with remote actors, but useful for testing
+      context.watch(driver) // Doesn't work with remote actors, but useful for testing
     } catch {
       case e: Exception =>
-        logError("Failed to connect to master", e)
+        logError("Failed to connect to driver", e)
         System.exit(1)
     }
   }
 
   override def receive = {
     case RegisteredSlave(sparkProperties) =>
-      logInfo("Successfully registered with master")
+      logInfo("Successfully registered with driver")
       executor.initialize(hostname, sparkProperties)
 
     case RegisterSlaveFailed(message) =>
@@ -55,24 +55,24 @@ private[spark] class StandaloneExecutorBackend(
   }
 
   override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer) {
-    master ! StatusUpdate(slaveId, taskId, state, data)
+    driver ! StatusUpdate(workerId, taskId, state, data)
   }
 }
 
 private[spark] object StandaloneExecutorBackend {
-  def run(masterUrl: String, slaveId: String, hostname: String, cores: Int) {
+  def run(driverUrl: String, workerId: String, hostname: String, cores: Int) {
     // Create a new ActorSystem to run the backend, because we can't create a SparkEnv / Executor
     // before getting started with all our system properties, etc
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("sparkExecutor", hostname, 0)
     val actor = actorSystem.actorOf(
-      Props(new StandaloneExecutorBackend(new Executor, masterUrl, slaveId, hostname, cores)),
+      Props(new StandaloneExecutorBackend(new Executor, driverUrl, workerId, hostname, cores)),
       name = "Executor")
     actorSystem.awaitTermination()
   }
 
   def main(args: Array[String]) {
     if (args.length != 4) {
-      System.err.println("Usage: StandaloneExecutorBackend <master> <slaveId> <hostname> <cores>")
+      System.err.println("Usage: StandaloneExecutorBackend <driverUrl> <workerId> <hostname> <cores>")
       System.exit(1)
     }
     run(args(0), args(1), args(2), args(3).toInt)
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 4f82cd96dd..866beb6d01 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -19,7 +19,7 @@ private[spark] class SparkDeploySchedulerBackend(
   var shutdownCallback : (SparkDeploySchedulerBackend) => Unit = _
 
   val maxCores = System.getProperty("spark.cores.max", Int.MaxValue.toString).toInt
-  val executorIdToSlaveId = new HashMap[String, String]
+  val executorIdToWorkerId = new HashMap[String, String]
 
   // Memory used by each executor (in megabytes)
   val executorMemory = {
@@ -34,10 +34,11 @@ private[spark] class SparkDeploySchedulerBackend(
   override def start() {
     super.start()
 
-    val masterUrl = "akka://spark@%s:%s/user/%s".format(
-      System.getProperty("spark.master.host"), System.getProperty("spark.master.port"),
+    // The endpoint for executors to talk to us
+    val driverUrl = "akka://spark@%s:%s/user/%s".format(
+      System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
       StandaloneSchedulerBackend.ACTOR_NAME)
-    val args = Seq(masterUrl, "{{SLAVEID}}", "{{HOSTNAME}}", "{{CORES}}")
+    val args = Seq(driverUrl, "{{SLAVEID}}", "{{HOSTNAME}}", "{{CORES}}")
     val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
     val sparkHome = sc.getSparkHome().getOrElse(throw new IllegalArgumentException("must supply spark home for spark standalone"))
     val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, sparkHome)
@@ -55,35 +56,35 @@ private[spark] class SparkDeploySchedulerBackend(
     }
   }
 
-  def connected(jobId: String) {
+  override def connected(jobId: String) {
     logInfo("Connected to Spark cluster with job ID " + jobId)
   }
 
-  def disconnected() {
+  override def disconnected() {
     if (!stopping) {
       logError("Disconnected from Spark cluster!")
       scheduler.error("Disconnected from Spark cluster")
     }
   }
 
-  def executorAdded(id: String, workerId: String, host: String, cores: Int, memory: Int) {
-    executorIdToSlaveId += id -> workerId
+  override def executorAdded(fullId: String, workerId: String, host: String, cores: Int, memory: Int) {
+    executorIdToWorkerId += fullId -> workerId
     logInfo("Granted executor ID %s on host %s with %d cores, %s RAM".format(
-       id, host, cores, Utils.memoryMegabytesToString(memory)))
+       fullId, host, cores, Utils.memoryMegabytesToString(memory)))
   }
 
-  def executorRemoved(id: String, message: String, exitStatus: Option[Int]) {
+  override def executorRemoved(fullId: String, message: String, exitStatus: Option[Int]) {
     val reason: ExecutorLossReason = exitStatus match {
       case Some(code) => ExecutorExited(code)
       case None => SlaveLost(message)
     }
-    logInfo("Executor %s removed: %s".format(id, message))
-    executorIdToSlaveId.get(id) match {
-      case Some(slaveId) => 
-        executorIdToSlaveId.remove(id)
-        scheduler.slaveLost(slaveId, reason)
+    logInfo("Executor %s removed: %s".format(fullId, message))
+    executorIdToWorkerId.get(fullId) match {
+      case Some(workerId) => 
+        executorIdToWorkerId.remove(fullId)
+        scheduler.slaveLost(workerId, reason)
       case None =>
-        logInfo("No slave ID known for executor %s".format(id))
+        logInfo("No worker ID known for executor %s".format(fullId))
     }
   }
 }
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
index 1386cd9d44..bea9dc4f23 100644
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
@@ -6,7 +6,7 @@ import spark.util.SerializableBuffer
 
 private[spark] sealed trait StandaloneClusterMessage extends Serializable
 
-// Master to slaves
+// Driver to executors
 private[spark]
 case class LaunchTask(task: TaskDescription) extends StandaloneClusterMessage
 
@@ -16,7 +16,7 @@ case class RegisteredSlave(sparkProperties: Seq[(String, String)]) extends Stand
 private[spark]
 case class RegisterSlaveFailed(message: String) extends StandaloneClusterMessage
 
-// Slaves to master
+// Executors to driver
 private[spark]
 case class RegisterSlave(slaveId: String, host: String, cores: Int) extends StandaloneClusterMessage
 
@@ -32,6 +32,6 @@ object StatusUpdate {
   }
 }
 
-// Internal messages in master
+// Internal messages in driver
 private[spark] case object ReviveOffers extends StandaloneClusterMessage
-private[spark] case object StopMaster extends StandaloneClusterMessage
+private[spark] case object StopDriver extends StandaloneClusterMessage
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index eeaae23dc8..d742a7b2bf 100644
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -23,7 +23,7 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
   // Use an atomic variable to track total number of cores in the cluster for simplicity and speed
   var totalCoreCount = new AtomicInteger(0)
 
-  class MasterActor(sparkProperties: Seq[(String, String)]) extends Actor {
+  class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor {
     val slaveActor = new HashMap[String, ActorRef]
     val slaveAddress = new HashMap[String, Address]
     val slaveHost = new HashMap[String, String]
@@ -37,34 +37,34 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     }
 
     def receive = {
-      case RegisterSlave(slaveId, host, cores) =>
-        if (slaveActor.contains(slaveId)) {
-          sender ! RegisterSlaveFailed("Duplicate slave ID: " + slaveId)
+      case RegisterSlave(workerId, host, cores) =>
+        if (slaveActor.contains(workerId)) {
+          sender ! RegisterSlaveFailed("Duplicate slave ID: " + workerId)
         } else {
-          logInfo("Registered slave: " + sender + " with ID " + slaveId)
+          logInfo("Registered slave: " + sender + " with ID " + workerId)
           sender ! RegisteredSlave(sparkProperties)
           context.watch(sender)
-          slaveActor(slaveId) = sender
-          slaveHost(slaveId) = host
-          freeCores(slaveId) = cores
-          slaveAddress(slaveId) = sender.path.address
-          actorToSlaveId(sender) = slaveId
-          addressToSlaveId(sender.path.address) = slaveId
+          slaveActor(workerId) = sender
+          slaveHost(workerId) = host
+          freeCores(workerId) = cores
+          slaveAddress(workerId) = sender.path.address
+          actorToSlaveId(sender) = workerId
+          addressToSlaveId(sender.path.address) = workerId
           totalCoreCount.addAndGet(cores)
           makeOffers()
         }
 
-      case StatusUpdate(slaveId, taskId, state, data) =>
+      case StatusUpdate(workerId, taskId, state, data) =>
         scheduler.statusUpdate(taskId, state, data.value)
         if (TaskState.isFinished(state)) {
-          freeCores(slaveId) += 1
-          makeOffers(slaveId)
+          freeCores(workerId) += 1
+          makeOffers(workerId)
         }
 
       case ReviveOffers =>
         makeOffers()
 
-      case StopMaster =>
+      case StopDriver =>
         sender ! true
         context.stop(self)
 
@@ -85,9 +85,9 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     }
 
     // Make fake resource offers on just one slave
-    def makeOffers(slaveId: String) {
+    def makeOffers(workerId: String) {
       launchTasks(scheduler.resourceOffers(
-        Seq(new WorkerOffer(slaveId, slaveHost(slaveId), freeCores(slaveId)))))
+        Seq(new WorkerOffer(workerId, slaveHost(workerId), freeCores(workerId)))))
     }
 
     // Launch tasks returned by a set of resource offers
@@ -99,24 +99,24 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     }
 
     // Remove a disconnected slave from the cluster
-    def removeSlave(slaveId: String, reason: String) {
-      logInfo("Slave " + slaveId + " disconnected, so removing it")
-      val numCores = freeCores(slaveId)
-      actorToSlaveId -= slaveActor(slaveId)
-      addressToSlaveId -= slaveAddress(slaveId)
-      slaveActor -= slaveId
-      slaveHost -= slaveId
-      freeCores -= slaveId
-      slaveHost -= slaveId
+    def removeSlave(workerId: String, reason: String) {
+      logInfo("Slave " + workerId + " disconnected, so removing it")
+      val numCores = freeCores(workerId)
+      actorToSlaveId -= slaveActor(workerId)
+      addressToSlaveId -= slaveAddress(workerId)
+      slaveActor -= workerId
+      slaveHost -= workerId
+      freeCores -= workerId
+      slaveHost -= workerId
       totalCoreCount.addAndGet(-numCores)
-      scheduler.slaveLost(slaveId, SlaveLost(reason))
+      scheduler.slaveLost(workerId, SlaveLost(reason))
     }
   }
 
-  var masterActor: ActorRef = null
+  var driverActor: ActorRef = null
   val taskIdsOnSlave = new HashMap[String, HashSet[String]]
 
-  def start() {
+  override def start() {
     val properties = new ArrayBuffer[(String, String)]
     val iterator = System.getProperties.entrySet.iterator
     while (iterator.hasNext) {
@@ -126,15 +126,15 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
         properties += ((key, value))
       }
     }
-    masterActor = actorSystem.actorOf(
-      Props(new MasterActor(properties)), name = StandaloneSchedulerBackend.ACTOR_NAME)
+    driverActor = actorSystem.actorOf(
+      Props(new DriverActor(properties)), name = StandaloneSchedulerBackend.ACTOR_NAME)
   }
 
-  def stop() {
+  override def stop() {
     try {
-      if (masterActor != null) {
+      if (driverActor != null) {
         val timeout = 5.seconds
-        val future = masterActor.ask(StopMaster)(timeout)
+        val future = driverActor.ask(StopDriver)(timeout)
         Await.result(future, timeout)
       }
     } catch {
@@ -143,11 +143,11 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     }
   }
 
-  def reviveOffers() {
-    masterActor ! ReviveOffers
+  override def reviveOffers() {
+    driverActor ! ReviveOffers
   }
 
-  def defaultParallelism(): Int = math.max(totalCoreCount.get(), 2)
+  override def defaultParallelism(): Int = math.max(totalCoreCount.get(), 2)
 }
 
 private[spark] object StandaloneSchedulerBackend {
diff --git a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
index 014906b028..7bf56a05d6 100644
--- a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
@@ -104,11 +104,11 @@ private[spark] class CoarseMesosSchedulerBackend(
 
   def createCommand(offer: Offer, numCores: Int): CommandInfo = {
     val runScript = new File(sparkHome, "run").getCanonicalPath
-    val masterUrl = "akka://spark@%s:%s/user/%s".format(
-      System.getProperty("spark.master.host"), System.getProperty("spark.master.port"),
+    val driverUrl = "akka://spark@%s:%s/user/%s".format(
+      System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
       StandaloneSchedulerBackend.ACTOR_NAME)
     val command = "\"%s\" spark.executor.StandaloneExecutorBackend %s %s %s %d".format(
-      runScript, masterUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)
+      runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)
     val environment = Environment.newBuilder()
     sc.executorEnvs.foreach { case (key, value) =>
       environment.addVariables(Environment.Variable.newBuilder()
diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
index a3d8671834..9fd2b454a4 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
@@ -11,52 +11,51 @@ import akka.util.duration._
 
 import spark.{Logging, SparkException, Utils}
 
-
 private[spark] class BlockManagerMaster(
     val actorSystem: ActorSystem,
-    isMaster: Boolean,
+    isDriver: Boolean,
     isLocal: Boolean,
-    masterIp: String,
-    masterPort: Int)
+    driverIp: String,
+    driverPort: Int)
   extends Logging {
 
   val AKKA_RETRY_ATTEMPS: Int = System.getProperty("spark.akka.num.retries", "3").toInt
   val AKKA_RETRY_INTERVAL_MS: Int = System.getProperty("spark.akka.retry.wait", "3000").toInt
 
-  val MASTER_AKKA_ACTOR_NAME = "BlockMasterManager"
+  val DRIVER_AKKA_ACTOR_NAME = "BlockMasterManager"
   val SLAVE_AKKA_ACTOR_NAME = "BlockSlaveManager"
   val DEFAULT_MANAGER_IP: String = Utils.localHostName()
 
   val timeout = 10.seconds
-  var masterActor: ActorRef = {
-    if (isMaster) {
-      val masterActor = actorSystem.actorOf(Props(new BlockManagerMasterActor(isLocal)),
-        name = MASTER_AKKA_ACTOR_NAME)
+  var driverActor: ActorRef = {
+    if (isDriver) {
+      val driverActor = actorSystem.actorOf(Props(new BlockManagerMasterActor(isLocal)),
+        name = DRIVER_AKKA_ACTOR_NAME)
       logInfo("Registered BlockManagerMaster Actor")
-      masterActor
+      driverActor
     } else {
-      val url = "akka://spark@%s:%s/user/%s".format(masterIp, masterPort, MASTER_AKKA_ACTOR_NAME)
+      val url = "akka://spark@%s:%s/user/%s".format(driverIp, driverPort, DRIVER_AKKA_ACTOR_NAME)
       logInfo("Connecting to BlockManagerMaster: " + url)
       actorSystem.actorFor(url)
     }
   }
 
-  /** Remove a dead host from the master actor. This is only called on the master side. */
+  /** Remove a dead host from the driver actor. This is only called on the driver side. */
   def notifyADeadHost(host: String) {
     tell(RemoveHost(host))
     logInfo("Removed " + host + " successfully in notifyADeadHost")
   }
 
   /**
-   * Send the master actor a heart beat from the slave. Returns true if everything works out,
-   * false if the master does not know about the given block manager, which means the block
+   * Send the driver actor a heart beat from the slave. Returns true if everything works out,
+   * false if the driver does not know about the given block manager, which means the block
    * manager should re-register.
    */
   def sendHeartBeat(blockManagerId: BlockManagerId): Boolean = {
-    askMasterWithRetry[Boolean](HeartBeat(blockManagerId))
+    askDriverWithReply[Boolean](HeartBeat(blockManagerId))
   }
 
-  /** Register the BlockManager's id with the master. */
+  /** Register the BlockManager's id with the driver. */
   def registerBlockManager(
     blockManagerId: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
     logInfo("Trying to register BlockManager")
@@ -70,25 +69,25 @@ private[spark] class BlockManagerMaster(
       storageLevel: StorageLevel,
       memSize: Long,
       diskSize: Long): Boolean = {
-    val res = askMasterWithRetry[Boolean](
+    val res = askDriverWithReply[Boolean](
       UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize))
     logInfo("Updated info of block " + blockId)
     res
   }
 
-  /** Get locations of the blockId from the master */
+  /** Get locations of the blockId from the driver */
   def getLocations(blockId: String): Seq[BlockManagerId] = {
-    askMasterWithRetry[Seq[BlockManagerId]](GetLocations(blockId))
+    askDriverWithReply[Seq[BlockManagerId]](GetLocations(blockId))
   }
 
-  /** Get locations of multiple blockIds from the master */
+  /** Get locations of multiple blockIds from the driver */
   def getLocations(blockIds: Array[String]): Seq[Seq[BlockManagerId]] = {
-    askMasterWithRetry[Seq[Seq[BlockManagerId]]](GetLocationsMultipleBlockIds(blockIds))
+    askDriverWithReply[Seq[Seq[BlockManagerId]]](GetLocationsMultipleBlockIds(blockIds))
   }
 
-  /** Get ids of other nodes in the cluster from the master */
+  /** Get ids of other nodes in the cluster from the driver */
   def getPeers(blockManagerId: BlockManagerId, numPeers: Int): Seq[BlockManagerId] = {
-    val result = askMasterWithRetry[Seq[BlockManagerId]](GetPeers(blockManagerId, numPeers))
+    val result = askDriverWithReply[Seq[BlockManagerId]](GetPeers(blockManagerId, numPeers))
     if (result.length != numPeers) {
       throw new SparkException(
         "Error getting peers, only got " + result.size + " instead of " + numPeers)
@@ -98,10 +97,10 @@ private[spark] class BlockManagerMaster(
 
   /**
    * Remove a block from the slaves that have it. This can only be used to remove
-   * blocks that the master knows about.
+   * blocks that the driver knows about.
    */
   def removeBlock(blockId: String) {
-    askMasterWithRetry(RemoveBlock(blockId))
+    askDriverWithReply(RemoveBlock(blockId))
   }
 
   /**
@@ -111,33 +110,33 @@ private[spark] class BlockManagerMaster(
    * amount of remaining memory.
    */
   def getMemoryStatus: Map[BlockManagerId, (Long, Long)] = {
-    askMasterWithRetry[Map[BlockManagerId, (Long, Long)]](GetMemoryStatus)
+    askDriverWithReply[Map[BlockManagerId, (Long, Long)]](GetMemoryStatus)
   }
 
-  /** Stop the master actor, called only on the Spark master node */
+  /** Stop the driver actor, called only on the Spark driver node */
   def stop() {
-    if (masterActor != null) {
+    if (driverActor != null) {
       tell(StopBlockManagerMaster)
-      masterActor = null
+      driverActor = null
       logInfo("BlockManagerMaster stopped")
     }
   }
 
   /** Send a one-way message to the master actor, to which we expect it to reply with true. */
   private def tell(message: Any) {
-    if (!askMasterWithRetry[Boolean](message)) {
+    if (!askDriverWithReply[Boolean](message)) {
       throw new SparkException("BlockManagerMasterActor returned false, expected true.")
     }
   }
 
   /**
-   * Send a message to the master actor and get its result within a default timeout, or
+   * Send a message to the driver actor and get its result within a default timeout, or
    * throw a SparkException if this fails.
    */
-  private def askMasterWithRetry[T](message: Any): T = {
+  private def askDriverWithReply[T](message: Any): T = {
     // TODO: Consider removing multiple attempts
-    if (masterActor == null) {
-      throw new SparkException("Error sending message to BlockManager as masterActor is null " +
+    if (driverActor == null) {
+      throw new SparkException("Error sending message to BlockManager as driverActor is null " +
         "[message = " + message + "]")
     }
     var attempts = 0
@@ -145,7 +144,7 @@ private[spark] class BlockManagerMaster(
     while (attempts < AKKA_RETRY_ATTEMPS) {
       attempts += 1
       try {
-        val future = masterActor.ask(message)(timeout)
+        val future = driverActor.ask(message)(timeout)
         val result = Await.result(future, timeout)
         if (result == null) {
           throw new Exception("BlockManagerMaster returned null")
diff --git a/core/src/main/scala/spark/storage/ThreadingTest.scala b/core/src/main/scala/spark/storage/ThreadingTest.scala
index 689f07b969..0b8f6d4303 100644
--- a/core/src/main/scala/spark/storage/ThreadingTest.scala
+++ b/core/src/main/scala/spark/storage/ThreadingTest.scala
@@ -75,9 +75,9 @@ private[spark] object ThreadingTest {
     System.setProperty("spark.kryoserializer.buffer.mb", "1")
     val actorSystem = ActorSystem("test")
     val serializer = new KryoSerializer
-    val masterIp: String = System.getProperty("spark.master.host", "localhost")
-    val masterPort: Int = System.getProperty("spark.master.port", "7077").toInt
-    val blockManagerMaster = new BlockManagerMaster(actorSystem, true, true, masterIp, masterPort)
+    val driverIp: String = System.getProperty("spark.driver.host", "localhost")
+    val driverPort: Int = System.getProperty("spark.driver.port", "7077").toInt
+    val blockManagerMaster = new BlockManagerMaster(actorSystem, true, true, driverIp, driverPort)
     val blockManager = new BlockManager(actorSystem, blockManagerMaster, serializer, 1024 * 1024)
     val producers = (1 to numProducers).map(i => new ProducerThread(blockManager, i))
     val consumers = producers.map(p => new ConsumerThread(blockManager, p.queue))
diff --git a/core/src/test/scala/spark/JavaAPISuite.java b/core/src/test/scala/spark/JavaAPISuite.java
index 01351de4ae..42ce6f3c74 100644
--- a/core/src/test/scala/spark/JavaAPISuite.java
+++ b/core/src/test/scala/spark/JavaAPISuite.java
@@ -46,7 +46,7 @@ public class JavaAPISuite implements Serializable {
     sc.stop();
     sc = null;
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port");
+    System.clearProperty("spark.driver.port");
   }
 
   static class ReverseIntComparator implements Comparator<Integer>, Serializable {
diff --git a/core/src/test/scala/spark/LocalSparkContext.scala b/core/src/test/scala/spark/LocalSparkContext.scala
index b5e31ddae3..ff00dd05dd 100644
--- a/core/src/test/scala/spark/LocalSparkContext.scala
+++ b/core/src/test/scala/spark/LocalSparkContext.scala
@@ -26,7 +26,7 @@ object LocalSparkContext {
   def stop(sc: SparkContext) {
     sc.stop()
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
+    System.clearProperty("spark.driver.port")
   }
 
   /** Runs `f` by passing in `sc` and ensures that `sc` is stopped. */
diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
index 7d5305f1e0..718107d2b5 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
@@ -79,7 +79,7 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
   test("remote fetch") {
     val (actorSystem, boundPort) =
       AkkaUtils.createActorSystem("test", "localhost", 0)
-    System.setProperty("spark.master.port", boundPort.toString)
+    System.setProperty("spark.driver.port", boundPort.toString)
     val masterTracker = new MapOutputTracker(actorSystem, true)
     val slaveTracker = new MapOutputTracker(actorSystem, false)
     masterTracker.registerShuffle(10, 1)
diff --git a/docs/configuration.md b/docs/configuration.md
index 036a0df480..a7054b4321 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -202,7 +202,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>10</td>
   <td>
     Maximum message size to allow in "control plane" communication (for serialized tasks and task
-    results), in MB. Increase this if your tasks need to send back large results to the master
+    results), in MB. Increase this if your tasks need to send back large results to the driver
     (e.g. using <code>collect()</code> on a large dataset).
   </td>
 </tr>
@@ -211,7 +211,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>4</td>
   <td>
     Number of actor threads to use for communication. Can be useful to increase on large clusters
-    when the master has a lot of CPU cores.
+    when the driver has a lot of CPU cores.
   </td>
 </tr>
 <tr>
@@ -222,17 +222,17 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-  <td>spark.master.host</td>
+  <td>spark.driver.host</td>
   <td>(local hostname)</td>
   <td>
-    Hostname or IP address for the master to listen on.
+    Hostname or IP address for the driver to listen on.
   </td>
 </tr>
 <tr>
-  <td>spark.master.port</td>
+  <td>spark.driver.port</td>
   <td>(random)</td>
   <td>
-    Port for the master to listen on.
+    Port for the driver to listen on.
   </td>
 </tr>
 <tr>
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 46ab34f063..df7235756d 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -26,7 +26,7 @@ class PySparkTestCase(unittest.TestCase):
         sys.path = self._old_sys_path
         # To avoid Akka rebinding to the same port, since it doesn't unbind
         # immediately on shutdown
-        self.sc.jvm.System.clearProperty("spark.master.port")
+        self.sc.jvm.System.clearProperty("spark.driver.port")
 
 
 class TestCheckpoint(PySparkTestCase):
diff --git a/repl/src/test/scala/spark/repl/ReplSuite.scala b/repl/src/test/scala/spark/repl/ReplSuite.scala
index db78d06d4f..43559b96d3 100644
--- a/repl/src/test/scala/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/spark/repl/ReplSuite.scala
@@ -31,7 +31,7 @@ class ReplSuite extends FunSuite {
     if (interp.sparkContext != null)
       interp.sparkContext.stop()
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
+    System.clearProperty("spark.driver.port")
     return out.toString
   }
   
diff --git a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala
index aa6be95f30..8c322dd698 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala
@@ -153,8 +153,8 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log
   /** A helper actor that communicates with the NetworkInputTracker */
   private class NetworkReceiverActor extends Actor {
     logInfo("Attempting to register with tracker")
-    val ip = System.getProperty("spark.master.host", "localhost")
-    val port = System.getProperty("spark.master.port", "7077").toInt
+    val ip = System.getProperty("spark.driver.host", "localhost")
+    val port = System.getProperty("spark.driver.port", "7077").toInt
     val url = "akka://spark@%s:%s/user/NetworkInputTracker".format(ip, port)
     val tracker = env.actorSystem.actorFor(url)
     val timeout = 5.seconds
diff --git a/streaming/src/test/java/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
index c84e7331c7..79d6093429 100644
--- a/streaming/src/test/java/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
@@ -43,7 +43,7 @@ public class JavaAPISuite implements Serializable {
     ssc = null;
 
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port");
+    System.clearProperty("spark.driver.port");
   }
 
   @Test
diff --git a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
index bfdf32c73e..4a036f0710 100644
--- a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
@@ -10,7 +10,7 @@ class BasicOperationsSuite extends TestSuiteBase {
 
   after {
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
+    System.clearProperty("spark.driver.port")
   }
 
   test("map") {
diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
index d2f32c189b..563a7d1458 100644
--- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
@@ -19,7 +19,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     FileUtils.deleteDirectory(new File(checkpointDir))
 
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
+    System.clearProperty("spark.driver.port")
   }
 
   var ssc: StreamingContext = null
diff --git a/streaming/src/test/scala/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/spark/streaming/FailureSuite.scala
index 7493ac1207..c4cfffbfc1 100644
--- a/streaming/src/test/scala/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/FailureSuite.scala
@@ -24,7 +24,7 @@ class FailureSuite extends TestSuiteBase with BeforeAndAfter {
     FileUtils.deleteDirectory(new File(checkpointDir))
 
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
+    System.clearProperty("spark.driver.port")
   }
 
   override def framework = "CheckpointSuite"
diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
index d7ba7a5d17..70ae6e3934 100644
--- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
@@ -42,7 +42,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     }
 
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
+    System.clearProperty("spark.driver.port")
   }
 
   test("network input stream") {
diff --git a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
index 0c6e928835..cd9608df53 100644
--- a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
@@ -13,7 +13,7 @@ class WindowOperationsSuite extends TestSuiteBase {
 
   after {
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.master.port")
+    System.clearProperty("spark.driver.port")
   }
 
   val largerSlideInput = Seq(

From 539491bbc333834b9ae2721ae6cf3524cefb91ea Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 25 Jan 2013 09:29:59 -0800
Subject: [PATCH 201/291] code reformatting

---
 core/src/main/scala/spark/RDD.scala                    | 4 ++--
 core/src/main/scala/spark/storage/BlockManagerUI.scala | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 870cc5ca78..4fcab9279a 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -94,7 +94,7 @@ abstract class RDD[T: ClassManifest](
   /** How this RDD depends on any parent RDDs. */
   protected def getDependencies(): List[Dependency[_]] = dependencies_
 
-  // A friendly name for this RDD
+  /** A friendly name for this RDD */
   var name: String = null
   
   /** Optionally overridden by subclasses to specify placement preferences. */
@@ -111,7 +111,7 @@ abstract class RDD[T: ClassManifest](
   /** A unique ID for this RDD (within its SparkContext). */
   val id = sc.newRddId()
 
-  /* Assign a name to this RDD */
+  /** Assign a name to this RDD */
   def setName(_name: String) = {
     name = _name
     this
diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
index 35cbd59280..1003cc7a61 100644
--- a/core/src/main/scala/spark/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -57,7 +57,8 @@ class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef,
 
         val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc)
 
-        spark.storage.html.index.render(maxMem, remainingMem, diskSpaceUsed, rdds, storageStatusList)
+        spark.storage.html.index.
+          render(maxMem, remainingMem, diskSpaceUsed, rdds, storageStatusList)
       }
     }}} ~
     get { path("rdd") { parameter("id") { id => { completeWith {
@@ -67,9 +68,10 @@ class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef,
 
 
         val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
-        val filteredStorageStatusList = StorageUtils.filterStorageStatusByPrefix(storageStatusList, prefix)
+        val filteredStorageStatusList = StorageUtils.
+          filterStorageStatusByPrefix(storageStatusList, prefix)
 
-        val rddInfo = StorageUtils.rddInfoFromStorageStatus(filteredStorageStatusList, sc).first
+        val rddInfo = StorageUtils.rddInfoFromStorageStatus(filteredStorageStatusList, sc).head
 
         spark.storage.html.rdd.render(rddInfo, filteredStorageStatusList)
 

From 1cadaa164e9f078e4ca483edb9db7fd5507c9e64 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 25 Jan 2013 09:30:21 -0800
Subject: [PATCH 202/291] switch to TimeStampedHashMap for storing persistent
 Rdds

---
 core/src/main/scala/spark/SparkContext.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index d994648899..10ceeb3028 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -44,6 +44,7 @@ import scheduler.{ResultTask, ShuffleMapTask, DAGScheduler, TaskScheduler}
 import spark.scheduler.local.LocalScheduler
 import spark.scheduler.cluster.{SparkDeploySchedulerBackend, SchedulerBackend, ClusterScheduler}
 import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
+import util.TimeStampedHashMap
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
@@ -110,7 +111,7 @@ class SparkContext(
   private[spark] val addedJars = HashMap[String, Long]()
 
   // Keeps track of all persisted RDDs
-  private[spark] val persistentRdds = new ConcurrentHashMap[Int, RDD[_]]()
+  private[spark] val persistentRdds = new TimeStampedHashMap[Int, RDD[_]]()
 
   // Add each JAR given through the constructor
   jars.foreach { addJar(_) }

From a1d9d1767d821c1e25e485e32d9356b12aba6a01 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 25 Jan 2013 10:05:26 -0800
Subject: [PATCH 203/291] fixup  1cadaa1, changed api of map

---
 core/src/main/scala/spark/storage/StorageUtils.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/spark/storage/StorageUtils.scala
index 63ad5c125b..a10e3a95c6 100644
--- a/core/src/main/scala/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/spark/storage/StorageUtils.scala
@@ -56,8 +56,8 @@ object StorageUtils {
       // Find the id of the RDD, e.g. rdd_1 => 1
       val rddId = rddKey.split("_").last.toInt
       // Get the friendly name for the rdd, if available.
-      val rddName = Option(sc.persistentRdds.get(rddId).name).getOrElse(rddKey)
-      val rddStorageLevel = sc.persistentRdds.get(rddId).getStorageLevel
+      val rddName = Option(sc.persistentRdds(rddId).name).getOrElse(rddKey)
+      val rddStorageLevel = sc.persistentRdds(rddId).getStorageLevel
       
       RDDInfo(rddId, rddName, rddStorageLevel, rddBlocks.length, memSize, diskSize)
     }.toArray

From 8efbda0b179e3821a1221c6d78681fc74248cdac Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Fri, 25 Jan 2013 14:55:33 -0600
Subject: [PATCH 204/291] Call executeOnCompleteCallbacks in more finally
 blocks.

---
 .../scala/spark/scheduler/DAGScheduler.scala  | 13 ++---
 .../spark/scheduler/ShuffleMapTask.scala      | 50 +++++++++----------
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index b320be8863..f599eb00bd 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -40,7 +40,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     eventQueue.put(HostLost(host))
   }
 
-  // Called by TaskScheduler to cancel an entier TaskSet due to repeated failures.
+  // Called by TaskScheduler to cancel an entire TaskSet due to repeated failures.
   override def taskSetFailed(taskSet: TaskSet, reason: String) {
     eventQueue.put(TaskSetFailed(taskSet, reason))
   }
@@ -54,8 +54,6 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   // resubmit failed stages
   val POLL_TIMEOUT = 10L
 
-  private val lock = new Object          // Used for access to the entire DAGScheduler
-
   private val eventQueue = new LinkedBlockingQueue[DAGSchedulerEvent]
 
   val nextRunId = new AtomicInteger(0)
@@ -337,9 +335,12 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
           val rdd = job.finalStage.rdd
           val split = rdd.splits(job.partitions(0))
           val taskContext = new TaskContext(job.finalStage.id, job.partitions(0), 0)
-          val result = job.func(taskContext, rdd.iterator(split, taskContext))
-          taskContext.executeOnCompleteCallbacks()
-          job.listener.taskSucceeded(0, result)
+          try {
+            val result = job.func(taskContext, rdd.iterator(split, taskContext))
+            job.listener.taskSucceeded(0, result)
+          } finally {
+            taskContext.executeOnCompleteCallbacks()
+          }
         } catch {
           case e: Exception =>
             job.listener.jobFailed(e)
diff --git a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
index 19f5328eee..83641a2a84 100644
--- a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
@@ -81,7 +81,7 @@ private[spark] class ShuffleMapTask(
   with Externalizable
   with Logging {
 
-  def this() = this(0, null, null, 0, null)
+  protected def this() = this(0, null, null, 0, null)
 
   var split = if (rdd == null) {
     null
@@ -117,34 +117,34 @@ private[spark] class ShuffleMapTask(
 
   override def run(attemptId: Long): MapStatus = {
     val numOutputSplits = dep.partitioner.numPartitions
-    val partitioner = dep.partitioner
 
     val taskContext = new TaskContext(stageId, partition, attemptId)
+    try {
+      // Partition the map output.
+      val buckets = Array.fill(numOutputSplits)(new ArrayBuffer[(Any, Any)])
+      for (elem <- rdd.iterator(split, taskContext)) {
+        val pair = elem.asInstanceOf[(Any, Any)]
+        val bucketId = dep.partitioner.getPartition(pair._1)
+        buckets(bucketId) += pair
+      }
+      val bucketIterators = buckets.map(_.iterator)
 
-    // Partition the map output.
-    val buckets = Array.fill(numOutputSplits)(new ArrayBuffer[(Any, Any)])
-    for (elem <- rdd.iterator(split, taskContext)) {
-      val pair = elem.asInstanceOf[(Any, Any)]
-      val bucketId = partitioner.getPartition(pair._1)
-      buckets(bucketId) += pair
+      val compressedSizes = new Array[Byte](numOutputSplits)
+
+      val blockManager = SparkEnv.get.blockManager
+      for (i <- 0 until numOutputSplits) {
+        val blockId = "shuffle_" + dep.shuffleId + "_" + partition + "_" + i
+        // Get a Scala iterator from Java map
+        val iter: Iterator[(Any, Any)] = bucketIterators(i)
+        val size = blockManager.put(blockId, iter, StorageLevel.DISK_ONLY, false)
+        compressedSizes(i) = MapOutputTracker.compressSize(size)
+      }
+
+      return new MapStatus(blockManager.blockManagerId, compressedSizes)
+    } finally {
+      // Execute the callbacks on task completion.
+      taskContext.executeOnCompleteCallbacks()
     }
-    val bucketIterators = buckets.map(_.iterator)
-
-    val compressedSizes = new Array[Byte](numOutputSplits)
-
-    val blockManager = SparkEnv.get.blockManager
-    for (i <- 0 until numOutputSplits) {
-      val blockId = "shuffle_" + dep.shuffleId + "_" + partition + "_" + i
-      // Get a Scala iterator from Java map
-      val iter: Iterator[(Any, Any)] = bucketIterators(i)
-      val size = blockManager.put(blockId, iter, StorageLevel.DISK_ONLY, false)
-      compressedSizes(i) = MapOutputTracker.compressSize(size)
-    }
-
-    // Execute the callbacks on task completion.
-    taskContext.executeOnCompleteCallbacks()
-
-    return new MapStatus(blockManager.blockManagerId, compressedSizes)
   }
 
   override def preferredLocations: Seq[String] = locs

From 49c05608f5f27354da120e2367b6d4a63ec38948 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 25 Jan 2013 17:04:16 -0800
Subject: [PATCH 205/291] add metadatacleaner for persisentRdd map

---
 core/src/main/scala/spark/SparkContext.scala | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 10ceeb3028..bff54dbdd1 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -44,7 +44,7 @@ import scheduler.{ResultTask, ShuffleMapTask, DAGScheduler, TaskScheduler}
 import spark.scheduler.local.LocalScheduler
 import spark.scheduler.cluster.{SparkDeploySchedulerBackend, SchedulerBackend, ClusterScheduler}
 import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
-import util.TimeStampedHashMap
+import util.{MetadataCleaner, TimeStampedHashMap}
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
@@ -113,6 +113,9 @@ class SparkContext(
   // Keeps track of all persisted RDDs
   private[spark] val persistentRdds = new TimeStampedHashMap[Int, RDD[_]]()
 
+  private[spark] val metadataCleaner = new MetadataCleaner("DAGScheduler", this.cleanup)
+
+
   // Add each JAR given through the constructor
   jars.foreach { addJar(_) }
 
@@ -512,6 +515,7 @@ class SparkContext(
   /** Shut down the SparkContext. */
   def stop() {
     if (dagScheduler != null) {
+      metadataCleaner.cancel()
       dagScheduler.stop()
       dagScheduler = null
       taskScheduler = null
@@ -654,6 +658,12 @@ class SparkContext(
 
   /** Register a new RDD, returning its RDD ID */
   private[spark] def newRddId(): Int = nextRddId.getAndIncrement()
+
+  private[spark] def cleanup(cleanupTime: Long) {
+    var sizeBefore = persistentRdds.size
+    persistentRdds.clearOldValues(cleanupTime)
+    logInfo("idToStage " + sizeBefore + " --> " + persistentRdds.size)
+  }
 }
 
 /**

From d49cf0e587b7cbbd31917d9bb69f98466feb0f9f Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sat, 26 Jan 2013 15:57:01 -0800
Subject: [PATCH 206/291] Fix JavaRDDLike.flatMap(PairFlatMapFunction)
 (SPARK-668).

This workaround is easier than rewriting JavaRDDLike in Java.
---
 .../scala/spark/api/java/JavaRDDLike.scala    |  7 ++---
 .../spark/api/java/PairFlatMapWorkaround.java | 20 +++++++++++++
 core/src/test/scala/spark/JavaAPISuite.java   | 28 +++++++++++++++++++
 3 files changed, 51 insertions(+), 4 deletions(-)
 create mode 100644 core/src/main/scala/spark/api/java/PairFlatMapWorkaround.java

diff --git a/core/src/main/scala/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
index b3698ffa44..4c95c989b5 100644
--- a/core/src/main/scala/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
@@ -12,7 +12,7 @@ import spark.storage.StorageLevel
 import com.google.common.base.Optional
 
 
-trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
+trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends PairFlatMapWorkaround[T] {
   def wrapRDD(rdd: RDD[T]): This
 
   implicit val classManifest: ClassManifest[T]
@@ -82,10 +82,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   *  Return a new RDD by first applying a function to all elements of this
-   *  RDD, and then flattening the results.
+   * Part of the workaround for SPARK-668; called in PairFlatMapWorkaround.java.
    */
-  def flatMap[K, V](f: PairFlatMapFunction[T, K, V]): JavaPairRDD[K, V] = {
+  private[spark] def doFlatMap[K, V](f: PairFlatMapFunction[T, K, V]): JavaPairRDD[K, V] = {
     import scala.collection.JavaConverters._
     def fn = (x: T) => f.apply(x).asScala
     def cm = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[Tuple2[K, V]]]
diff --git a/core/src/main/scala/spark/api/java/PairFlatMapWorkaround.java b/core/src/main/scala/spark/api/java/PairFlatMapWorkaround.java
new file mode 100644
index 0000000000..68b6fd6622
--- /dev/null
+++ b/core/src/main/scala/spark/api/java/PairFlatMapWorkaround.java
@@ -0,0 +1,20 @@
+package spark.api.java;
+
+import spark.api.java.JavaPairRDD;
+import spark.api.java.JavaRDDLike;
+import spark.api.java.function.PairFlatMapFunction;
+
+import java.io.Serializable;
+
+/**
+ * Workaround for SPARK-668.
+ */
+class PairFlatMapWorkaround<T> implements Serializable {
+    /**
+     *  Return a new RDD by first applying a function to all elements of this
+     *  RDD, and then flattening the results.
+     */
+    public <K, V> JavaPairRDD<K, V> flatMap(PairFlatMapFunction<T, K, V> f) {
+        return ((JavaRDDLike <T, ?>) this).doFlatMap(f);
+    }
+}
diff --git a/core/src/test/scala/spark/JavaAPISuite.java b/core/src/test/scala/spark/JavaAPISuite.java
index 01351de4ae..f50ba093e9 100644
--- a/core/src/test/scala/spark/JavaAPISuite.java
+++ b/core/src/test/scala/spark/JavaAPISuite.java
@@ -355,6 +355,34 @@ public class JavaAPISuite implements Serializable {
     Assert.assertEquals(11, pairs.count());
   }
 
+  @Test
+  public void mapsFromPairsToPairs() {
+      List<Tuple2<Integer, String>> pairs = Arrays.asList(
+              new Tuple2<Integer, String>(1, "a"),
+              new Tuple2<Integer, String>(2, "aa"),
+              new Tuple2<Integer, String>(3, "aaa")
+      );
+      JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
+
+      // Regression test for SPARK-668:
+      JavaPairRDD<String, Integer> swapped = pairRDD.flatMap(
+          new PairFlatMapFunction<Tuple2<Integer, String>, String, Integer>() {
+          @Override
+          public Iterable<Tuple2<String, Integer>> call(Tuple2<Integer, String> item) throws Exception {
+              return Collections.singletonList(item.swap());
+          }
+      });
+      swapped.collect();
+
+      // There was never a bug here, but it's worth testing:
+      pairRDD.map(new PairFunction<Tuple2<Integer, String>, String, Integer>() {
+          @Override
+          public Tuple2<String, Integer> call(Tuple2<Integer, String> item) throws Exception {
+              return item.swap();
+          }
+      }).collect();
+  }
+
   @Test
   public void mapPartitions() {
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);

From ad4232b4dadc6290d3c4696d3cc007d3f01cb236 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Sat, 26 Jan 2013 18:07:14 -0800
Subject: [PATCH 207/291] Fix deadlock in BlockManager reregistration triggered
 by failed updates.

---
 .../scala/spark/storage/BlockManager.scala    | 35 +++++++++++++++-
 .../spark/storage/BlockManagerSuite.scala     | 40 ++++++++++++++++++-
 2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/spark/storage/BlockManager.scala
index 19cdaaa984..19d35b8667 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/spark/storage/BlockManager.scala
@@ -90,7 +90,10 @@ class BlockManager(
   val slaveActor = master.actorSystem.actorOf(Props(new BlockManagerSlaveActor(this)),
     name = "BlockManagerActor" + BlockManager.ID_GENERATOR.next)
 
-  @volatile private var shuttingDown = false
+  // Pending reregistration action being executed asynchronously or null if none
+  // is pending. Accesses should synchronize on asyncReregisterLock.
+  var asyncReregisterTask: Future[Unit] = null
+  val asyncReregisterLock = new Object
 
   private def heartBeat() {
     if (!master.sendHeartBeat(blockManagerId)) {
@@ -147,6 +150,8 @@ class BlockManager(
   /**
    * Reregister with the master and report all blocks to it. This will be called by the heart beat
    * thread if our heartbeat to the block amnager indicates that we were not registered.
+   *
+   * Note that this method must be called without any BlockInfo locks held.
    */
   def reregister() {
     // TODO: We might need to rate limit reregistering.
@@ -155,6 +160,32 @@ class BlockManager(
     reportAllBlocks()
   }
 
+  /**
+   * Reregister with the master sometime soon.
+   */
+  def asyncReregister() {
+    asyncReregisterLock.synchronized {
+      if (asyncReregisterTask == null) {
+        asyncReregisterTask = Future[Unit] {
+          reregister()
+          asyncReregisterLock.synchronized {
+            asyncReregisterTask = null
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * For testing. Wait for any pending asynchronous reregistration; otherwise, do nothing.
+   */
+  def waitForAsyncReregister() {
+    val task = asyncReregisterTask
+    if (task != null) {
+      Await.ready(task, Duration.Inf)
+    }
+  }
+
   /**
    * Get storage level of local block. If no info exists for the block, then returns null.
    */
@@ -170,7 +201,7 @@ class BlockManager(
     if (needReregister) {
       logInfo("Got told to reregister updating block " + blockId)
       // Reregistering will report our new block for free.
-      reregister()
+      asyncReregister()
     }
     logDebug("Told master about block " + blockId)
   }
diff --git a/core/src/test/scala/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
index a1aeb12f25..2165744689 100644
--- a/core/src/test/scala/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
@@ -219,18 +219,56 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     val a2 = new Array[Byte](400)
 
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
-
     assert(master.getLocations("a1").size > 0, "master was not told about a1")
 
     master.notifyADeadHost(store.blockManagerId.ip)
     assert(master.getLocations("a1").size == 0, "a1 was not removed from master")
 
     store.putSingle("a2", a1, StorageLevel.MEMORY_ONLY)
+    store.waitForAsyncReregister()
 
     assert(master.getLocations("a1").size > 0, "a1 was not reregistered with master")
     assert(master.getLocations("a2").size > 0, "master was not told about a2")
   }
 
+  test("reregistration doesn't dead lock") {
+    val heartBeat = PrivateMethod[Unit]('heartBeat)
+    store = new BlockManager(actorSystem, master, serializer, 2000)
+    val a1 = new Array[Byte](400)
+    val a2 = List(new Array[Byte](400))
+
+    // try many times to trigger any deadlocks
+    for (i <- 1 to 100) {
+      master.notifyADeadHost(store.blockManagerId.ip)
+      val t1 = new Thread {
+        override def run = {
+          store.put("a2", a2.iterator, StorageLevel.MEMORY_ONLY, true)
+        }
+      }
+      val t2 = new Thread {
+        override def run = {
+          store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
+        }
+      }
+      val t3 = new Thread {
+        override def run = {
+          store invokePrivate heartBeat()
+        }
+      }
+
+      t1.start
+      t2.start
+      t3.start
+      t1.join
+      t2.join
+      t3.join
+ 
+      store.dropFromMemory("a1", null)
+      store.dropFromMemory("a2", null)
+      store.waitForAsyncReregister()
+    }
+  }
+
   test("in-memory LRU storage") {
     store = new BlockManager(actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)

From 58fc6b2bed9f660fbf134aab188827b7d8975a62 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Sat, 26 Jan 2013 18:07:53 -0800
Subject: [PATCH 208/291] Handle duplicate registrations better.

---
 core/src/main/scala/spark/storage/BlockManagerMasterActor.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
index f4d026da33..2216c33b76 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
@@ -183,7 +183,7 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
 
     if (blockManagerId.ip == Utils.localHostName() && !isLocal) {
       logInfo("Got Register Msg from master node, don't register it")
-    } else {
+    } else if (!blockManagerInfo.contains(blockManagerId)) {
       blockManagerIdByHost.get(blockManagerId.ip) match {
         case Some(managers) =>
           // A block manager of the same host name already exists.

From 717b221cca79cb8a1603e9dcf7f0bb50e215ac41 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Sat, 26 Jan 2013 22:59:22 -0800
Subject: [PATCH 209/291] Detect whether we run on EC2 using ec2-metadata as
 well

---
 bin/start-master.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/start-master.sh b/bin/start-master.sh
index a901b1c260..87feb261fe 100755
--- a/bin/start-master.sh
+++ b/bin/start-master.sh
@@ -26,7 +26,8 @@ fi
 # Set SPARK_PUBLIC_DNS so the master report the correct webUI address to the slaves
 if [ "$SPARK_PUBLIC_DNS" = "" ]; then
     # If we appear to be running on EC2, use the public address by default:
-    if [[ `hostname` == *ec2.internal ]]; then
+    # NOTE: ec2-metadata is installed on Amazon Linux AMI. Check based on that and hostname
+    if command -v ec2-metadata > /dev/null || [[ `hostname` == *ec2.internal ]]; then
         export SPARK_PUBLIC_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/public-hostname`
     fi
 fi

From 44b4a0f88fcb31727347b755ae8ec14d69571b52 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 27 Jan 2013 19:23:49 -0800
Subject: [PATCH 210/291] Track workers by executor ID instead of hostname to
 allow multiple executors per machine and remove the need for multiple IP
 addresses in unit tests.

---
 .../main/scala/spark/MapOutputTracker.scala   |   4 +-
 core/src/main/scala/spark/SparkContext.scala  |   6 +-
 core/src/main/scala/spark/SparkEnv.scala      |   9 +-
 .../spark/deploy/LocalSparkCluster.scala      |  16 +--
 .../scala/spark/deploy/master/Master.scala    |   4 +-
 .../spark/deploy/worker/ExecutorRunner.scala  |   2 +-
 .../main/scala/spark/executor/Executor.scala  |   4 +-
 .../spark/executor/MesosExecutorBackend.scala |   3 +-
 .../executor/StandaloneExecutorBackend.scala  |  14 +--
 .../scala/spark/scheduler/DAGScheduler.scala  |  44 +++----
 .../spark/scheduler/DAGSchedulerEvent.scala   |   2 +-
 .../scala/spark/scheduler/MapStatus.scala     |   6 +-
 .../main/scala/spark/scheduler/Stage.scala    |  11 +-
 .../scheduler/TaskSchedulerListener.scala     |   2 +-
 .../scheduler/cluster/ClusterScheduler.scala  | 110 ++++++++++--------
 .../cluster/SparkDeploySchedulerBackend.scala |   4 +-
 .../cluster/StandaloneSchedulerBackend.scala  |  64 +++++-----
 .../scheduler/cluster/TaskDescription.scala   |   2 +-
 .../spark/scheduler/cluster/TaskInfo.scala    |   7 +-
 .../scheduler/cluster/TaskSetManager.scala    |  38 +++---
 .../spark/scheduler/cluster/WorkerOffer.scala |   4 +-
 .../mesos/MesosSchedulerBackend.scala         |   2 +-
 .../scala/spark/storage/BlockManager.scala    |  10 +-
 .../scala/spark/storage/BlockManagerId.scala  |  27 +++--
 .../spark/storage/BlockManagerMaster.scala    |  12 +-
 .../storage/BlockManagerMasterActor.scala     |  66 +++++------
 .../spark/storage/BlockManagerMessages.scala  |   2 +-
 .../scala/spark/storage/BlockManagerUI.scala  |   7 +-
 .../scala/spark/storage/ThreadingTest.scala   |   3 +-
 .../src/main/scala/spark/util/AkkaUtils.scala |   6 +-
 .../scala/spark/util/TimeStampedHashMap.scala |   4 +-
 core/src/test/scala/spark/DriverSuite.scala   |   5 +-
 .../scala/spark/MapOutputTrackerSuite.scala   |  69 ++++++-----
 .../spark/storage/BlockManagerSuite.scala     |  86 +++++++-------
 sbt/sbt                                       |   2 +-
 35 files changed, 343 insertions(+), 314 deletions(-)

diff --git a/core/src/main/scala/spark/MapOutputTracker.scala b/core/src/main/scala/spark/MapOutputTracker.scala
index ac02f3363a..c1f012b419 100644
--- a/core/src/main/scala/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/spark/MapOutputTracker.scala
@@ -114,7 +114,7 @@ private[spark] class MapOutputTracker(actorSystem: ActorSystem, isMaster: Boolea
     var array = mapStatuses(shuffleId)
     if (array != null) {
       array.synchronized {
-        if (array(mapId) != null && array(mapId).address == bmAddress) {
+        if (array(mapId) != null && array(mapId).location == bmAddress) {
           array(mapId) = null
         }
       }
@@ -277,7 +277,7 @@ private[spark] object MapOutputTracker {
           throw new FetchFailedException(null, shuffleId, -1, reduceId,
             new Exception("Missing an output location for shuffle " + shuffleId))
         } else {
-          (status.address, decompressSize(status.compressedSizes(reduceId)))
+          (status.location, decompressSize(status.compressedSizes(reduceId)))
         }
     }
   }
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 4581c0adcf..39721b47ae 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -80,6 +80,7 @@ class SparkContext(
 
   // Create the Spark execution environment (cache, map output tracker, etc)
   private[spark] val env = SparkEnv.createFromSystemProperties(
+    "<driver>",
     System.getProperty("spark.master.host"),
     System.getProperty("spark.master.port").toInt,
     true,
@@ -97,7 +98,7 @@ class SparkContext(
   // Keeps track of all persisted RDDs
   private[spark] val persistentRdds = new TimeStampedHashMap[Int, RDD[_]]()
 
-  private[spark] val metadataCleaner = new MetadataCleaner("DAGScheduler", this.cleanup)
+  private[spark] val metadataCleaner = new MetadataCleaner("SparkContext", this.cleanup)
 
 
   // Add each JAR given through the constructor
@@ -649,10 +650,9 @@ class SparkContext(
   /** Register a new RDD, returning its RDD ID */
   private[spark] def newRddId(): Int = nextRddId.getAndIncrement()
 
+  /** Called by MetadataCleaner to clean up the persistentRdds map periodically */
   private[spark] def cleanup(cleanupTime: Long) {
-    var sizeBefore = persistentRdds.size
     persistentRdds.clearOldValues(cleanupTime)
-    logInfo("idToStage " + sizeBefore + " --> " + persistentRdds.size)
   }
 }
 
diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala
index 2a7a8af83d..0c094edcf3 100644
--- a/core/src/main/scala/spark/SparkEnv.scala
+++ b/core/src/main/scala/spark/SparkEnv.scala
@@ -19,6 +19,7 @@ import spark.util.AkkaUtils
  * SparkEnv.get (e.g. after creating a SparkContext) and set it with SparkEnv.set.
  */
 class SparkEnv (
+    val executorId: String,
     val actorSystem: ActorSystem,
     val serializer: Serializer,
     val closureSerializer: Serializer,
@@ -58,11 +59,12 @@ object SparkEnv extends Logging {
   }
 
   def createFromSystemProperties(
+      executorId: String,
       hostname: String,
       port: Int,
       isMaster: Boolean,
-      isLocal: Boolean
-    ) : SparkEnv = {
+      isLocal: Boolean): SparkEnv = {
+
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, port)
 
     // Bit of a hack: If this is the master and our port was 0 (meaning bind to any free port),
@@ -86,7 +88,7 @@ object SparkEnv extends Logging {
     val masterPort: Int = System.getProperty("spark.master.port", "7077").toInt
     val blockManagerMaster = new BlockManagerMaster(
       actorSystem, isMaster, isLocal, masterIp, masterPort)
-    val blockManager = new BlockManager(actorSystem, blockManagerMaster, serializer)
+    val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster, serializer)
 
     val connectionManager = blockManager.connectionManager
 
@@ -122,6 +124,7 @@ object SparkEnv extends Logging {
     }
 
     new SparkEnv(
+      executorId,
       actorSystem,
       serializer,
       closureSerializer,
diff --git a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
index 4211d80596..8f51051e39 100644
--- a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
@@ -9,6 +9,12 @@ import spark.{Logging, Utils}
 
 import scala.collection.mutable.ArrayBuffer
 
+/**
+ * Testing class that creates a Spark standalone process in-cluster (that is, running the
+ * spark.deploy.master.Master and spark.deploy.worker.Workers in the same JVMs). Executors launched
+ * by the Workers still run in separate JVMs. This can be used to test distributed operation and
+ * fault recovery without spinning up a lot of processes.
+ */
 private[spark]
 class LocalSparkCluster(numSlaves: Int, coresPerSlave: Int, memoryPerSlave: Int) extends Logging {
   
@@ -35,16 +41,12 @@ class LocalSparkCluster(numSlaves: Int, coresPerSlave: Int, memoryPerSlave: Int)
 
     /* Start the Slaves */
     for (slaveNum <- 1 to numSlaves) {
-      /* We can pretend to test distributed stuff by giving the slaves distinct hostnames.
-         All of 127/8 should be a loopback, we use 127.100.*.* in hopes that it is
-         sufficiently distinctive. */
-      val slaveIpAddress = "127.100.0." + (slaveNum % 256)
       val (actorSystem, boundPort) = 
-        AkkaUtils.createActorSystem("sparkWorker" + slaveNum, slaveIpAddress, 0)
+        AkkaUtils.createActorSystem("sparkWorker" + slaveNum, localIpAddress, 0)
       slaveActorSystems += actorSystem
       val actor = actorSystem.actorOf(
-        Props(new Worker(slaveIpAddress, boundPort, 0, coresPerSlave, memoryPerSlave, masterUrl)),
-        name = "Worker")
+        Props(new Worker(localIpAddress, boundPort, 0, coresPerSlave, memoryPerSlave, masterUrl)),
+              name = "Worker")
       slaveActors += actor
     }
 
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
index 2c2cd0231b..2e7e868579 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -97,10 +97,10 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
             exec.worker.removeExecutor(exec)
 
             // Only retry certain number of times so we don't go into an infinite loop.
-            if (jobInfo.incrementRetryCount <= JobState.MAX_NUM_RETRY) {
+            if (jobInfo.incrementRetryCount < JobState.MAX_NUM_RETRY) {
               schedule()
             } else {
-              val e = new SparkException("Job %s wth ID %s failed %d times.".format(
+              val e = new SparkException("Job %s with ID %s failed %d times.".format(
                 jobInfo.desc.name, jobInfo.id, jobInfo.retryCount))
               logError(e.getMessage, e)
               throw e
diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
index 0d1fe2a6b4..af3acfecb6 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
@@ -67,7 +67,7 @@ private[spark] class ExecutorRunner(
 
   /** Replace variables such as {{SLAVEID}} and {{CORES}} in a command argument passed to us */
   def substituteVariables(argument: String): String = argument match {
-    case "{{SLAVEID}}" => workerId
+    case "{{EXECUTOR_ID}}" => execId.toString
     case "{{HOSTNAME}}" => hostname
     case "{{CORES}}" => cores.toString
     case other => other
diff --git a/core/src/main/scala/spark/executor/Executor.scala b/core/src/main/scala/spark/executor/Executor.scala
index 28d9d40d43..bd21ba719a 100644
--- a/core/src/main/scala/spark/executor/Executor.scala
+++ b/core/src/main/scala/spark/executor/Executor.scala
@@ -30,7 +30,7 @@ private[spark] class Executor extends Logging {
 
   initLogging()
 
-  def initialize(slaveHostname: String, properties: Seq[(String, String)]) {
+  def initialize(executorId: String, slaveHostname: String, properties: Seq[(String, String)]) {
     // Make sure the local hostname we report matches the cluster scheduler's name for this host
     Utils.setCustomHostname(slaveHostname)
 
@@ -64,7 +64,7 @@ private[spark] class Executor extends Logging {
     )
 
     // Initialize Spark environment (using system properties read above)
-    env = SparkEnv.createFromSystemProperties(slaveHostname, 0, false, false)
+    env = SparkEnv.createFromSystemProperties(executorId, slaveHostname, 0, false, false)
     SparkEnv.set(env)
 
     // Start worker thread pool
diff --git a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
index eeab3959c6..1ef88075ad 100644
--- a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
@@ -29,9 +29,10 @@ private[spark] class MesosExecutorBackend(executor: Executor)
       executorInfo: ExecutorInfo,
       frameworkInfo: FrameworkInfo,
       slaveInfo: SlaveInfo) {
+    logInfo("Registered with Mesos as executor ID " + executorInfo.getExecutorId.getValue)
     this.driver = driver
     val properties = Utils.deserialize[Array[(String, String)]](executorInfo.getData.toByteArray)
-    executor.initialize(slaveInfo.getHostname, properties)
+    executor.initialize(executorInfo.getExecutorId.getValue, slaveInfo.getHostname, properties)
   }
 
   override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo) {
diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
index a29bf974d2..435ee5743e 100644
--- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
+++ b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
@@ -17,7 +17,7 @@ import spark.scheduler.cluster.RegisterSlave
 private[spark] class StandaloneExecutorBackend(
     executor: Executor,
     masterUrl: String,
-    slaveId: String,
+    executorId: String,
     hostname: String,
     cores: Int)
   extends Actor
@@ -30,7 +30,7 @@ private[spark] class StandaloneExecutorBackend(
     try {
       logInfo("Connecting to master: " + masterUrl)
       master = context.actorFor(masterUrl)
-      master ! RegisterSlave(slaveId, hostname, cores)
+      master ! RegisterSlave(executorId, hostname, cores)
       context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
       context.watch(master) // Doesn't work with remote actors, but useful for testing
     } catch {
@@ -43,7 +43,7 @@ private[spark] class StandaloneExecutorBackend(
   override def receive = {
     case RegisteredSlave(sparkProperties) =>
       logInfo("Successfully registered with master")
-      executor.initialize(hostname, sparkProperties)
+      executor.initialize(executorId, hostname, sparkProperties)
 
     case RegisterSlaveFailed(message) =>
       logError("Slave registration failed: " + message)
@@ -55,24 +55,24 @@ private[spark] class StandaloneExecutorBackend(
   }
 
   override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer) {
-    master ! StatusUpdate(slaveId, taskId, state, data)
+    master ! StatusUpdate(executorId, taskId, state, data)
   }
 }
 
 private[spark] object StandaloneExecutorBackend {
-  def run(masterUrl: String, slaveId: String, hostname: String, cores: Int) {
+  def run(masterUrl: String, executorId: String, hostname: String, cores: Int) {
     // Create a new ActorSystem to run the backend, because we can't create a SparkEnv / Executor
     // before getting started with all our system properties, etc
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("sparkExecutor", hostname, 0)
     val actor = actorSystem.actorOf(
-      Props(new StandaloneExecutorBackend(new Executor, masterUrl, slaveId, hostname, cores)),
+      Props(new StandaloneExecutorBackend(new Executor, masterUrl, executorId, hostname, cores)),
       name = "Executor")
     actorSystem.awaitTermination()
   }
 
   def main(args: Array[String]) {
     if (args.length != 4) {
-      System.err.println("Usage: StandaloneExecutorBackend <master> <slaveId> <hostname> <cores>")
+      System.err.println("Usage: StandaloneExecutorBackend <master> <executorId> <hostname> <cores>")
       System.exit(1)
     }
     run(args(0), args(1), args(2), args(3).toInt)
diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index f599eb00bd..bd541d4207 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -35,9 +35,9 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     eventQueue.put(CompletionEvent(task, reason, result, accumUpdates))
   }
 
-  // Called by TaskScheduler when a host fails.
-  override def hostLost(host: String) {
-    eventQueue.put(HostLost(host))
+  // Called by TaskScheduler when an executor fails.
+  override def executorLost(execId: String) {
+    eventQueue.put(ExecutorLost(execId))
   }
 
   // Called by TaskScheduler to cancel an entire TaskSet due to repeated failures.
@@ -72,7 +72,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
 
   // For tracking failed nodes, we use the MapOutputTracker's generation number, which is
   // sent with every task. When we detect a node failing, we note the current generation number
-  // and failed host, increment it for new tasks, and use this to ignore stray ShuffleMapTask
+  // and failed executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask
   // results.
   // TODO: Garbage collect information about failure generations when we know there are no more
   //       stray messages to detect.
@@ -108,7 +108,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   }
 
   def clearCacheLocs() {
-    cacheLocs.clear
+    cacheLocs.clear()
   }
 
   /**
@@ -271,8 +271,8 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
             submitStage(finalStage)
           }
 
-        case HostLost(host) =>
-          handleHostLost(host)
+        case ExecutorLost(execId) =>
+          handleExecutorLost(execId)
 
         case completion: CompletionEvent =>
           handleTaskCompletion(completion)
@@ -436,10 +436,10 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
           case smt: ShuffleMapTask =>
             val stage = idToStage(smt.stageId)
             val status = event.result.asInstanceOf[MapStatus]
-            val host = status.address.ip
-            logInfo("ShuffleMapTask finished with host " + host)
-            if (failedGeneration.contains(host) && smt.generation <= failedGeneration(host)) {
-              logInfo("Ignoring possibly bogus ShuffleMapTask completion from " + host)
+            val execId = status.location.executorId
+            logDebug("ShuffleMapTask finished on " + execId)
+            if (failedGeneration.contains(execId) && smt.generation <= failedGeneration(execId)) {
+              logInfo("Ignoring possibly bogus ShuffleMapTask completion from " + execId)
             } else {
               stage.addOutputLoc(smt.partition, status)
             }
@@ -511,9 +511,9 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
         // Remember that a fetch failed now; this is used to resubmit the broken
         // stages later, after a small wait (to give other tasks the chance to fail)
         lastFetchFailureTime = System.currentTimeMillis() // TODO: Use pluggable clock
-        // TODO: mark the host as failed only if there were lots of fetch failures on it
+        // TODO: mark the executor as failed only if there were lots of fetch failures on it
         if (bmAddress != null) {
-          handleHostLost(bmAddress.ip, Some(task.generation))
+          handleExecutorLost(bmAddress.executorId, Some(task.generation))
         }
 
       case other =>
@@ -523,21 +523,21 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   }
 
   /**
-   * Responds to a host being lost. This is called inside the event loop so it assumes that it can
-   * modify the scheduler's internal state. Use hostLost() to post a host lost event from outside.
+   * Responds to an executor being lost. This is called inside the event loop, so it assumes it can
+   * modify the scheduler's internal state. Use executorLost() to post a loss event from outside.
    *
    * Optionally the generation during which the failure was caught can be passed to avoid allowing
    * stray fetch failures from possibly retriggering the detection of a node as lost.
    */
-  def handleHostLost(host: String, maybeGeneration: Option[Long] = None) {
+  def handleExecutorLost(execId: String, maybeGeneration: Option[Long] = None) {
     val currentGeneration = maybeGeneration.getOrElse(mapOutputTracker.getGeneration)
-    if (!failedGeneration.contains(host) || failedGeneration(host) < currentGeneration) {
-      failedGeneration(host) = currentGeneration
-      logInfo("Host lost: " + host + " (generation " + currentGeneration + ")")
-      env.blockManager.master.notifyADeadHost(host)
+    if (!failedGeneration.contains(execId) || failedGeneration(execId) < currentGeneration) {
+      failedGeneration(execId) = currentGeneration
+      logInfo("Executor lost: %s (generation %d)".format(execId, currentGeneration))
+      env.blockManager.master.removeExecutor(execId)
       // TODO: This will be really slow if we keep accumulating shuffle map stages
       for ((shuffleId, stage) <- shuffleToMapStage) {
-        stage.removeOutputsOnHost(host)
+        stage.removeOutputsOnExecutor(execId)
         val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
         mapOutputTracker.registerMapOutputs(shuffleId, locs, true)
       }
@@ -546,7 +546,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       }
       clearCacheLocs()
     } else {
-      logDebug("Additional host lost message for " + host +
+      logDebug("Additional executor lost message for " + execId +
                "(generation " + currentGeneration + ")")
     }
   }
diff --git a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala
index 3422a21d9d..b34fa78c07 100644
--- a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala
+++ b/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala
@@ -28,7 +28,7 @@ private[spark] case class CompletionEvent(
     accumUpdates: Map[Long, Any])
   extends DAGSchedulerEvent
 
-private[spark] case class HostLost(host: String) extends DAGSchedulerEvent
+private[spark] case class ExecutorLost(execId: String) extends DAGSchedulerEvent
 
 private[spark] case class TaskSetFailed(taskSet: TaskSet, reason: String) extends DAGSchedulerEvent
 
diff --git a/core/src/main/scala/spark/scheduler/MapStatus.scala b/core/src/main/scala/spark/scheduler/MapStatus.scala
index fae643f3a8..203abb917b 100644
--- a/core/src/main/scala/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/spark/scheduler/MapStatus.scala
@@ -8,19 +8,19 @@ import java.io.{ObjectOutput, ObjectInput, Externalizable}
  * task ran on as well as the sizes of outputs for each reducer, for passing on to the reduce tasks.
  * The map output sizes are compressed using MapOutputTracker.compressSize.
  */
-private[spark] class MapStatus(var address: BlockManagerId, var compressedSizes: Array[Byte])
+private[spark] class MapStatus(var location: BlockManagerId, var compressedSizes: Array[Byte])
   extends Externalizable {
 
   def this() = this(null, null)  // For deserialization only
 
   def writeExternal(out: ObjectOutput) {
-    address.writeExternal(out)
+    location.writeExternal(out)
     out.writeInt(compressedSizes.length)
     out.write(compressedSizes)
   }
 
   def readExternal(in: ObjectInput) {
-    address = BlockManagerId(in)
+    location = BlockManagerId(in)
     compressedSizes = new Array[Byte](in.readInt())
     in.readFully(compressedSizes)
   }
diff --git a/core/src/main/scala/spark/scheduler/Stage.scala b/core/src/main/scala/spark/scheduler/Stage.scala
index 4846b66729..e9419728e3 100644
--- a/core/src/main/scala/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/spark/scheduler/Stage.scala
@@ -51,18 +51,18 @@ private[spark] class Stage(
 
   def removeOutputLoc(partition: Int, bmAddress: BlockManagerId) {
     val prevList = outputLocs(partition)
-    val newList = prevList.filterNot(_.address == bmAddress)
+    val newList = prevList.filterNot(_.location == bmAddress)
     outputLocs(partition) = newList
     if (prevList != Nil && newList == Nil) {
       numAvailableOutputs -= 1
     }
   }
  
-  def removeOutputsOnHost(host: String) {
+  def removeOutputsOnExecutor(execId: String) {
     var becameUnavailable = false
     for (partition <- 0 until numPartitions) {
       val prevList = outputLocs(partition)
-      val newList = prevList.filterNot(_.address.ip == host)
+      val newList = prevList.filterNot(_.location.executorId == execId)
       outputLocs(partition) = newList
       if (prevList != Nil && newList == Nil) {
         becameUnavailable = true
@@ -70,7 +70,8 @@ private[spark] class Stage(
       }
     }
     if (becameUnavailable) {
-      logInfo("%s is now unavailable on %s (%d/%d, %s)".format(this, host, numAvailableOutputs, numPartitions, isAvailable))
+      logInfo("%s is now unavailable on executor %s (%d/%d, %s)".format(
+        this, execId, numAvailableOutputs, numPartitions, isAvailable))
     }
   }
 
@@ -82,7 +83,7 @@ private[spark] class Stage(
 
   def origin: String = rdd.origin
 
-  override def toString = "Stage " + id // + ": [RDD = " + rdd.id + ", isShuffle = " + isShuffleMap + "]"
+  override def toString = "Stage " + id
 
   override def hashCode(): Int = id
 }
diff --git a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala b/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala
index fa4de15d0d..9fcef86e46 100644
--- a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala
+++ b/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala
@@ -12,7 +12,7 @@ private[spark] trait TaskSchedulerListener {
   def taskEnded(task: Task[_], reason: TaskEndReason, result: Any, accumUpdates: Map[Long, Any]): Unit
 
   // A node was lost from the cluster.
-  def hostLost(host: String): Unit
+  def executorLost(execId: String): Unit
 
   // The TaskScheduler wants to abort an entire task set.
   def taskSetFailed(taskSet: TaskSet, reason: String): Unit
diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
index a639b72795..0b4177805b 100644
--- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
@@ -27,19 +27,20 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
   var activeTaskSetsQueue = new ArrayBuffer[TaskSetManager]
 
   val taskIdToTaskSetId = new HashMap[Long, String]
-  val taskIdToSlaveId = new HashMap[Long, String]
+  val taskIdToExecutorId = new HashMap[Long, String]
   val taskSetTaskIds = new HashMap[String, HashSet[Long]]
 
   // Incrementing Mesos task IDs
   val nextTaskId = new AtomicLong(0)
 
-  // Which hosts in the cluster are alive (contains hostnames)
-  val hostsAlive = new HashSet[String]
+  // Which executor IDs we have executors on
+  val activeExecutorIds = new HashSet[String]
 
-  // Which slave IDs we have executors on
-  val slaveIdsWithExecutors = new HashSet[String]
+  // The set of executors we have on each host; this is used to compute hostsAlive, which
+  // in turn is used to decide when we can attain data locality on a given host
+  val executorsByHost = new HashMap[String, HashSet[String]]
 
-  val slaveIdToHost = new HashMap[String, String]
+  val executorIdToHost = new HashMap[String, String]
 
   // JAR server, if any JARs were added by the user to the SparkContext
   var jarServer: HttpServer = null
@@ -102,7 +103,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
       activeTaskSets -= manager.taskSet.id
       activeTaskSetsQueue -= manager
       taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id)
-      taskIdToSlaveId --= taskSetTaskIds(manager.taskSet.id)
+      taskIdToExecutorId --= taskSetTaskIds(manager.taskSet.id)
       taskSetTaskIds.remove(manager.taskSet.id)
     }
   }
@@ -117,8 +118,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
       SparkEnv.set(sc.env)
       // Mark each slave as alive and remember its hostname
       for (o <- offers) {
-        slaveIdToHost(o.slaveId) = o.hostname
-        hostsAlive += o.hostname
+        executorIdToHost(o.executorId) = o.hostname
       }
       // Build a list of tasks to assign to each slave
       val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores))
@@ -128,16 +128,20 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
         do {
           launchedTask = false
           for (i <- 0 until offers.size) {
-            val sid = offers(i).slaveId
+            val execId = offers(i).executorId
             val host = offers(i).hostname
-            manager.slaveOffer(sid, host, availableCpus(i)) match {
+            manager.slaveOffer(execId, host, availableCpus(i)) match {
               case Some(task) =>
                 tasks(i) += task
                 val tid = task.taskId
                 taskIdToTaskSetId(tid) = manager.taskSet.id
                 taskSetTaskIds(manager.taskSet.id) += tid
-                taskIdToSlaveId(tid) = sid
-                slaveIdsWithExecutors += sid
+                taskIdToExecutorId(tid) = execId
+                activeExecutorIds += execId
+                if (!executorsByHost.contains(host)) {
+                  executorsByHost(host) = new HashSet()
+                }
+                executorsByHost(host) += execId
                 availableCpus(i) -= 1
                 launchedTask = true
 
@@ -152,25 +156,21 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
 
   def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
     var taskSetToUpdate: Option[TaskSetManager] = None
-    var failedHost: Option[String] = None
+    var failedExecutor: Option[String] = None
     var taskFailed = false
     synchronized {
       try {
-        if (state == TaskState.LOST && taskIdToSlaveId.contains(tid)) {
-          // We lost the executor on this slave, so remember that it's gone
-          val slaveId = taskIdToSlaveId(tid)
-          val host = slaveIdToHost(slaveId)
-          if (hostsAlive.contains(host)) {
-            slaveIdsWithExecutors -= slaveId
-            hostsAlive -= host
-            activeTaskSetsQueue.foreach(_.hostLost(host))
-            failedHost = Some(host)
+        if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) {
+          // We lost this entire executor, so remember that it's gone
+          val execId = taskIdToExecutorId(tid)
+          if (activeExecutorIds.contains(execId)) {
+            removeExecutor(execId)
+            failedExecutor = Some(execId)
           }
         }
         taskIdToTaskSetId.get(tid) match {
           case Some(taskSetId) =>
             if (activeTaskSets.contains(taskSetId)) {
-              //activeTaskSets(taskSetId).statusUpdate(status)
               taskSetToUpdate = Some(activeTaskSets(taskSetId))
             }
             if (TaskState.isFinished(state)) {
@@ -178,7 +178,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
               if (taskSetTaskIds.contains(taskSetId)) {
                 taskSetTaskIds(taskSetId) -= tid
               }
-              taskIdToSlaveId.remove(tid)
+              taskIdToExecutorId.remove(tid)
             }
             if (state == TaskState.FAILED) {
               taskFailed = true
@@ -190,12 +190,12 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
         case e: Exception => logError("Exception in statusUpdate", e)
       }
     }
-    // Update the task set and DAGScheduler without holding a lock on this, because that can deadlock
+    // Update the task set and DAGScheduler without holding a lock on this, since that can deadlock
     if (taskSetToUpdate != None) {
       taskSetToUpdate.get.statusUpdate(tid, state, serializedData)
     }
-    if (failedHost != None) {
-      listener.hostLost(failedHost.get)
+    if (failedExecutor != None) {
+      listener.executorLost(failedExecutor.get)
       backend.reviveOffers()
     }
     if (taskFailed) {
@@ -249,32 +249,42 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
     }
   }
 
-  def slaveLost(slaveId: String, reason: ExecutorLossReason) {
-    var failedHost: Option[String] = None
+  def executorLost(executorId: String, reason: ExecutorLossReason) {
+    var failedExecutor: Option[String] = None
     synchronized {
-      slaveIdToHost.get(slaveId) match {
-        case Some(host) =>
-          if (hostsAlive.contains(host)) {
-            logError("Lost an executor on " + host + ": " + reason)
-            slaveIdsWithExecutors -= slaveId
-            hostsAlive -= host
-            activeTaskSetsQueue.foreach(_.hostLost(host))
-            failedHost = Some(host)
-          } else {
-            // We may get multiple slaveLost() calls with different loss reasons. For example, one 
-            // may be triggered by a dropped connection from the slave while another may be a report
-            // of executor termination from Mesos. We produce log messages for both so we eventually
-            // report the termination reason.
-            logError("Lost an executor on " + host + " (already removed): " + reason)
-          }
-        case None =>
-          // We were told about a slave being lost before we could even allocate work to it
-          logError("Lost slave " + slaveId + " (no work assigned yet)")
+      if (activeExecutorIds.contains(executorId)) {
+        val host = executorIdToHost(executorId)
+        logError("Lost executor %s on %s: %s".format(executorId, host, reason))
+        removeExecutor(executorId)
+        failedExecutor = Some(executorId)
+      } else {
+         // We may get multiple executorLost() calls with different loss reasons. For example, one
+         // may be triggered by a dropped connection from the slave while another may be a report
+         // of executor termination from Mesos. We produce log messages for both so we eventually
+         // report the termination reason.
+         logError("Lost an executor " + executorId + " (already removed): " + reason)
       }
     }
-    if (failedHost != None) {
-      listener.hostLost(failedHost.get)
+    // Call listener.executorLost without holding the lock on this to prevent deadlock
+    if (failedExecutor != None) {
+      listener.executorLost(failedExecutor.get)
       backend.reviveOffers()
     }
   }
+
+  /** Get a list of hosts that currently have executors */
+  def hostsAlive: scala.collection.Set[String] = executorsByHost.keySet
+
+  /** Remove an executor from all our data structures and mark it as lost */
+  private def removeExecutor(executorId: String) {
+    activeExecutorIds -= executorId
+    val host = executorIdToHost(executorId)
+    val execs = executorsByHost.getOrElse(host, new HashSet)
+    execs -= executorId
+    if (execs.isEmpty) {
+      executorsByHost -= host
+    }
+    executorIdToHost -= executorId
+    activeTaskSetsQueue.foreach(_.executorLost(executorId, host))
+  }
 }
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 4f82cd96dd..f0792c1b76 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -37,7 +37,7 @@ private[spark] class SparkDeploySchedulerBackend(
     val masterUrl = "akka://spark@%s:%s/user/%s".format(
       System.getProperty("spark.master.host"), System.getProperty("spark.master.port"),
       StandaloneSchedulerBackend.ACTOR_NAME)
-    val args = Seq(masterUrl, "{{SLAVEID}}", "{{HOSTNAME}}", "{{CORES}}")
+    val args = Seq(masterUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}")
     val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
     val sparkHome = sc.getSparkHome().getOrElse(throw new IllegalArgumentException("must supply spark home for spark standalone"))
     val jobDesc = new JobDescription(jobName, maxCores, executorMemory, command, sparkHome)
@@ -81,7 +81,7 @@ private[spark] class SparkDeploySchedulerBackend(
     executorIdToSlaveId.get(id) match {
       case Some(slaveId) => 
         executorIdToSlaveId.remove(id)
-        scheduler.slaveLost(slaveId, reason)
+        scheduler.executorLost(slaveId, reason)
       case None =>
         logInfo("No slave ID known for executor %s".format(id))
     }
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index eeaae23dc8..32be1e7a26 100644
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -28,8 +28,8 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     val slaveAddress = new HashMap[String, Address]
     val slaveHost = new HashMap[String, String]
     val freeCores = new HashMap[String, Int]
-    val actorToSlaveId = new HashMap[ActorRef, String]
-    val addressToSlaveId = new HashMap[Address, String]
+    val actorToExecutorId = new HashMap[ActorRef, String]
+    val addressToExecutorId = new HashMap[Address, String]
 
     override def preStart() {
       // Listen for remote client disconnection events, since they don't go through Akka's watch()
@@ -37,28 +37,28 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     }
 
     def receive = {
-      case RegisterSlave(slaveId, host, cores) =>
-        if (slaveActor.contains(slaveId)) {
-          sender ! RegisterSlaveFailed("Duplicate slave ID: " + slaveId)
+      case RegisterSlave(executorId, host, cores) =>
+        if (slaveActor.contains(executorId)) {
+          sender ! RegisterSlaveFailed("Duplicate executor ID: " + executorId)
         } else {
-          logInfo("Registered slave: " + sender + " with ID " + slaveId)
+          logInfo("Registered executor: " + sender + " with ID " + executorId)
           sender ! RegisteredSlave(sparkProperties)
           context.watch(sender)
-          slaveActor(slaveId) = sender
-          slaveHost(slaveId) = host
-          freeCores(slaveId) = cores
-          slaveAddress(slaveId) = sender.path.address
-          actorToSlaveId(sender) = slaveId
-          addressToSlaveId(sender.path.address) = slaveId
+          slaveActor(executorId) = sender
+          slaveHost(executorId) = host
+          freeCores(executorId) = cores
+          slaveAddress(executorId) = sender.path.address
+          actorToExecutorId(sender) = executorId
+          addressToExecutorId(sender.path.address) = executorId
           totalCoreCount.addAndGet(cores)
           makeOffers()
         }
 
-      case StatusUpdate(slaveId, taskId, state, data) =>
+      case StatusUpdate(executorId, taskId, state, data) =>
         scheduler.statusUpdate(taskId, state, data.value)
         if (TaskState.isFinished(state)) {
-          freeCores(slaveId) += 1
-          makeOffers(slaveId)
+          freeCores(executorId) += 1
+          makeOffers(executorId)
         }
 
       case ReviveOffers =>
@@ -69,13 +69,13 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
         context.stop(self)
 
       case Terminated(actor) =>
-        actorToSlaveId.get(actor).foreach(removeSlave(_, "Akka actor terminated"))
+        actorToExecutorId.get(actor).foreach(removeSlave(_, "Akka actor terminated"))
 
       case RemoteClientDisconnected(transport, address) =>
-        addressToSlaveId.get(address).foreach(removeSlave(_, "remote Akka client disconnected"))
+        addressToExecutorId.get(address).foreach(removeSlave(_, "remote Akka client disconnected"))
 
       case RemoteClientShutdown(transport, address) =>
-        addressToSlaveId.get(address).foreach(removeSlave(_, "remote Akka client shutdown"))
+        addressToExecutorId.get(address).foreach(removeSlave(_, "remote Akka client shutdown"))
     }
 
     // Make fake resource offers on all slaves
@@ -85,31 +85,31 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     }
 
     // Make fake resource offers on just one slave
-    def makeOffers(slaveId: String) {
+    def makeOffers(executorId: String) {
       launchTasks(scheduler.resourceOffers(
-        Seq(new WorkerOffer(slaveId, slaveHost(slaveId), freeCores(slaveId)))))
+        Seq(new WorkerOffer(executorId, slaveHost(executorId), freeCores(executorId)))))
     }
 
     // Launch tasks returned by a set of resource offers
     def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
       for (task <- tasks.flatten) {
-        freeCores(task.slaveId) -= 1
-        slaveActor(task.slaveId) ! LaunchTask(task)
+        freeCores(task.executorId) -= 1
+        slaveActor(task.executorId) ! LaunchTask(task)
       }
     }
 
     // Remove a disconnected slave from the cluster
-    def removeSlave(slaveId: String, reason: String) {
-      logInfo("Slave " + slaveId + " disconnected, so removing it")
-      val numCores = freeCores(slaveId)
-      actorToSlaveId -= slaveActor(slaveId)
-      addressToSlaveId -= slaveAddress(slaveId)
-      slaveActor -= slaveId
-      slaveHost -= slaveId
-      freeCores -= slaveId
-      slaveHost -= slaveId
+    def removeSlave(executorId: String, reason: String) {
+      logInfo("Slave " + executorId + " disconnected, so removing it")
+      val numCores = freeCores(executorId)
+      actorToExecutorId -= slaveActor(executorId)
+      addressToExecutorId -= slaveAddress(executorId)
+      slaveActor -= executorId
+      slaveHost -= executorId
+      freeCores -= executorId
+      slaveHost -= executorId
       totalCoreCount.addAndGet(-numCores)
-      scheduler.slaveLost(slaveId, SlaveLost(reason))
+      scheduler.executorLost(executorId, SlaveLost(reason))
     }
   }
 
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala b/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala
index aa097fd3a2..b41e951be9 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala
@@ -5,7 +5,7 @@ import spark.util.SerializableBuffer
 
 private[spark] class TaskDescription(
     val taskId: Long,
-    val slaveId: String,
+    val executorId: String,
     val name: String,
     _serializedTask: ByteBuffer)
   extends Serializable {
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala b/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala
index ca84503780..0f975ce1eb 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala
@@ -4,7 +4,12 @@ package spark.scheduler.cluster
  * Information about a running task attempt inside a TaskSet.
  */
 private[spark]
-class TaskInfo(val taskId: Long, val index: Int, val launchTime: Long, val host: String) {
+class TaskInfo(
+    val taskId: Long,
+    val index: Int,
+    val launchTime: Long,
+    val executorId: String,
+    val host: String) {
   var finishTime: Long = 0
   var failed = false
 
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
index a089b71644..26201ad0dd 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
@@ -138,10 +138,11 @@ private[spark] class TaskSetManager(
   // attempt running on this host, in case the host is slow. In addition, if localOnly is set, the
   // task must have a preference for this host (or no preferred locations at all).
   def findSpeculativeTask(host: String, localOnly: Boolean): Option[Int] = {
+    val hostsAlive = sched.hostsAlive
     speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set
     val localTask = speculatableTasks.find {
         index =>
-          val locations = tasks(index).preferredLocations.toSet & sched.hostsAlive
+          val locations = tasks(index).preferredLocations.toSet & hostsAlive
           val attemptLocs = taskAttempts(index).map(_.host)
           (locations.size == 0 || locations.contains(host)) && !attemptLocs.contains(host)
       }
@@ -189,7 +190,7 @@ private[spark] class TaskSetManager(
   }
 
   // Respond to an offer of a single slave from the scheduler by finding a task
-  def slaveOffer(slaveId: String, host: String, availableCpus: Double): Option[TaskDescription] = {
+  def slaveOffer(execId: String, host: String, availableCpus: Double): Option[TaskDescription] = {
     if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) {
       val time = System.currentTimeMillis
       val localOnly = (time - lastPreferredLaunchTime < LOCALITY_WAIT)
@@ -206,11 +207,11 @@ private[spark] class TaskSetManager(
           } else {
             "non-preferred, not one of " + task.preferredLocations.mkString(", ")
           }
-          logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format(
-            taskSet.id, index, taskId, slaveId, host, prefStr))
+          logInfo("Starting task %s:%d as TID %s on executor %s: %s (%s)".format(
+            taskSet.id, index, taskId, execId, host, prefStr))
           // Do various bookkeeping
           copiesRunning(index) += 1
-          val info = new TaskInfo(taskId, index, time, host)
+          val info = new TaskInfo(taskId, index, time, execId, host)
           taskInfos(taskId) = info
           taskAttempts(index) = info :: taskAttempts(index)
           if (preferred) {
@@ -224,7 +225,7 @@ private[spark] class TaskSetManager(
           logInfo("Serialized task %s:%d as %d bytes in %d ms".format(
             taskSet.id, index, serializedTask.limit, timeTaken))
           val taskName = "task %s:%d".format(taskSet.id, index)
-          return Some(new TaskDescription(taskId, slaveId, taskName, serializedTask))
+          return Some(new TaskDescription(taskId, execId, taskName, serializedTask))
         }
         case _ =>
       }
@@ -356,19 +357,22 @@ private[spark] class TaskSetManager(
     sched.taskSetFinished(this)
   }
 
-  def hostLost(hostname: String) {
-    logInfo("Re-queueing tasks for " + hostname + " from TaskSet " + taskSet.id)
-    // If some task has preferred locations only on hostname, put it in the no-prefs list
-    // to avoid the wait from delay scheduling
-    for (index <- getPendingTasksForHost(hostname)) {
-      val newLocs = tasks(index).preferredLocations.toSet & sched.hostsAlive
-      if (newLocs.isEmpty) {
-        pendingTasksWithNoPrefs += index
+  def executorLost(execId: String, hostname: String) {
+    logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id)
+    val newHostsAlive = sched.hostsAlive
+    // If some task has preferred locations only on hostname, and there are no more executors there,
+    // put it in the no-prefs list to avoid the wait from delay scheduling
+    if (!newHostsAlive.contains(hostname)) {
+      for (index <- getPendingTasksForHost(hostname)) {
+        val newLocs = tasks(index).preferredLocations.toSet & newHostsAlive
+        if (newLocs.isEmpty) {
+          pendingTasksWithNoPrefs += index
+        }
       }
     }
-    // Re-enqueue any tasks that ran on the failed host if this is a shuffle map stage
+    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage
     if (tasks(0).isInstanceOf[ShuffleMapTask]) {
-      for ((tid, info) <- taskInfos if info.host == hostname) {
+      for ((tid, info) <- taskInfos if info.executorId == execId) {
         val index = taskInfos(tid).index
         if (finished(index)) {
           finished(index) = false
@@ -382,7 +386,7 @@ private[spark] class TaskSetManager(
       }
     }
     // Also re-enqueue any tasks that were running on the node
-    for ((tid, info) <- taskInfos if info.running && info.host == hostname) {
+    for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
       taskLost(tid, TaskState.KILLED, null)
     }
   }
diff --git a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala b/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala
index 6b919d68b2..3c3afcbb14 100644
--- a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala
@@ -1,8 +1,8 @@
 package spark.scheduler.cluster
 
 /**
- * Represents free resources available on a worker node.
+ * Represents free resources available on an executor.
  */
 private[spark]
-class WorkerOffer(val slaveId: String, val hostname: String, val cores: Int) {
+class WorkerOffer(val executorId: String, val hostname: String, val cores: Int) {
 }
diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
index 2989e31f5e..f3467db86b 100644
--- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
@@ -268,7 +268,7 @@ private[spark] class MesosSchedulerBackend(
     synchronized {
       slaveIdsWithExecutors -= slaveId.getValue
     }
-    scheduler.slaveLost(slaveId.getValue, reason)
+    scheduler.executorLost(slaveId.getValue, reason)
   }
 
   override def slaveLost(d: SchedulerDriver, slaveId: SlaveID) {
diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/spark/storage/BlockManager.scala
index 19d35b8667..1215d5f5c8 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/spark/storage/BlockManager.scala
@@ -30,6 +30,7 @@ extends Exception(message)
 
 private[spark]
 class BlockManager(
+    executorId: String,
     actorSystem: ActorSystem,
     val master: BlockManagerMaster,
     val serializer: Serializer,
@@ -68,8 +69,8 @@ class BlockManager(
   val connectionManager = new ConnectionManager(0)
   implicit val futureExecContext = connectionManager.futureExecContext
 
-  val connectionManagerId = connectionManager.id
-  val blockManagerId = BlockManagerId(connectionManagerId.host, connectionManagerId.port)
+  val blockManagerId = BlockManagerId(
+    executorId, connectionManager.id.host, connectionManager.id.port)
 
   // Max megabytes of data to keep in flight per reducer (to avoid over-allocating memory
   // for receiving shuffle outputs)
@@ -109,8 +110,9 @@ class BlockManager(
   /**
    * Construct a BlockManager with a memory limit set based on system properties.
    */
-  def this(actorSystem: ActorSystem, master: BlockManagerMaster, serializer: Serializer) = {
-    this(actorSystem, master, serializer, BlockManager.getMaxMemoryFromSystemProperties)
+  def this(execId: String, actorSystem: ActorSystem, master: BlockManagerMaster,
+           serializer: Serializer) = {
+    this(execId, actorSystem, master, serializer, BlockManager.getMaxMemoryFromSystemProperties)
   }
 
   /**
diff --git a/core/src/main/scala/spark/storage/BlockManagerId.scala b/core/src/main/scala/spark/storage/BlockManagerId.scala
index abb8b45a1f..f2f1e77d41 100644
--- a/core/src/main/scala/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerId.scala
@@ -7,27 +7,32 @@ import java.util.concurrent.ConcurrentHashMap
  * This class represent an unique identifier for a BlockManager.
  * The first 2 constructors of this class is made private to ensure that
  * BlockManagerId objects can be created only using the factory method in
- * [[spark.storage.BlockManager$]]. This allows de-duplication of id objects.
+ * [[spark.storage.BlockManager$]]. This allows de-duplication of ID objects.
  * Also, constructor parameters are private to ensure that parameters cannot
  * be modified from outside this class.
  */
 private[spark] class BlockManagerId private (
+    private var executorId_ : String,
     private var ip_ : String,
     private var port_ : Int
   ) extends Externalizable {
 
-  private def this() = this(null, 0)  // For deserialization only
+  private def this() = this(null, null, 0)  // For deserialization only
 
-  def ip = ip_
+  def executorId: String = executorId_
 
-  def port = port_
+  def ip: String = ip_
+
+  def port: Int = port_
 
   override def writeExternal(out: ObjectOutput) {
+    out.writeUTF(executorId_)
     out.writeUTF(ip_)
     out.writeInt(port_)
   }
 
   override def readExternal(in: ObjectInput) {
+    executorId_ = in.readUTF()
     ip_ = in.readUTF()
     port_ = in.readInt()
   }
@@ -35,21 +40,23 @@ private[spark] class BlockManagerId private (
   @throws(classOf[IOException])
   private def readResolve(): Object = BlockManagerId.getCachedBlockManagerId(this)
 
-  override def toString = "BlockManagerId(" + ip + ", " + port + ")"
+  override def toString = "BlockManagerId(%s, %s, %d)".format(executorId, ip, port)
 
-  override def hashCode = ip.hashCode * 41 + port
+  override def hashCode: Int = (executorId.hashCode * 41 + ip.hashCode) * 41 + port
 
   override def equals(that: Any) = that match {
-    case id: BlockManagerId => port == id.port && ip == id.ip
-    case _ => false
+    case id: BlockManagerId =>
+      executorId == id.executorId && port == id.port && ip == id.ip
+    case _ =>
+      false
   }
 }
 
 
 private[spark] object BlockManagerId {
 
-  def apply(ip: String, port: Int) =
-    getCachedBlockManagerId(new BlockManagerId(ip, port))
+  def apply(execId: String, ip: String, port: Int) =
+    getCachedBlockManagerId(new BlockManagerId(execId, ip, port))
 
   def apply(in: ObjectInput) = {
     val obj = new BlockManagerId()
diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
index 937115e92c..55ff1dde9c 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
@@ -24,7 +24,7 @@ private[spark] class BlockManagerMaster(
     masterPort: Int)
   extends Logging {
 
-  val AKKA_RETRY_ATTEMPS: Int = System.getProperty("spark.akka.num.retries", "3").toInt
+  val AKKA_RETRY_ATTEMPTS: Int = System.getProperty("spark.akka.num.retries", "3").toInt
   val AKKA_RETRY_INTERVAL_MS: Int = System.getProperty("spark.akka.retry.wait", "3000").toInt
 
   val MASTER_AKKA_ACTOR_NAME = "BlockMasterManager"
@@ -45,10 +45,10 @@ private[spark] class BlockManagerMaster(
     }
   }
 
-  /** Remove a dead host from the master actor. This is only called on the master side. */
-  def notifyADeadHost(host: String) {
-    tell(RemoveHost(host))
-    logInfo("Removed " + host + " successfully in notifyADeadHost")
+  /** Remove a dead executor from the master actor. This is only called on the master side. */
+  def removeExecutor(execId: String) {
+    tell(RemoveExecutor(execId))
+    logInfo("Removed " + execId + " successfully in removeExecutor")
   }
 
   /**
@@ -146,7 +146,7 @@ private[spark] class BlockManagerMaster(
     }
     var attempts = 0
     var lastException: Exception = null
-    while (attempts < AKKA_RETRY_ATTEMPS) {
+    while (attempts < AKKA_RETRY_ATTEMPTS) {
       attempts += 1
       try {
         val future = masterActor.ask(message)(timeout)
diff --git a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
index b31b6286d3..f88517f1a3 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
@@ -23,9 +23,8 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
   private val blockManagerInfo =
     new HashMap[BlockManagerId, BlockManagerMasterActor.BlockManagerInfo]
 
-  // Mapping from host name to block manager id. We allow multiple block managers
-  // on the same host name (ip).
-  private val blockManagerIdByHost = new HashMap[String, ArrayBuffer[BlockManagerId]]
+  // Mapping from executor ID to block manager ID.
+  private val blockManagerIdByExecutor = new HashMap[String, BlockManagerId]
 
   // Mapping from block id to the set of block managers that have the block.
   private val blockLocations = new JHashMap[String, Pair[Int, HashSet[BlockManagerId]]]
@@ -74,8 +73,8 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
     case RemoveBlock(blockId) =>
       removeBlock(blockId)
 
-    case RemoveHost(host) =>
-      removeHost(host)
+    case RemoveExecutor(execId) =>
+      removeExecutor(execId)
       sender ! true
 
     case StopBlockManagerMaster =>
@@ -99,16 +98,12 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
   def removeBlockManager(blockManagerId: BlockManagerId) {
     val info = blockManagerInfo(blockManagerId)
 
-    // Remove the block manager from blockManagerIdByHost. If the list of block
-    // managers belonging to the IP is empty, remove the entry from the hash map.
-    blockManagerIdByHost.get(blockManagerId.ip).foreach { managers: ArrayBuffer[BlockManagerId] =>
-      managers -= blockManagerId
-      if (managers.size == 0) blockManagerIdByHost.remove(blockManagerId.ip)
-    }
+    // Remove the block manager from blockManagerIdByExecutor.
+    blockManagerIdByExecutor -= blockManagerId.executorId
 
     // Remove it from blockManagerInfo and remove all the blocks.
     blockManagerInfo.remove(blockManagerId)
-    var iterator = info.blocks.keySet.iterator
+    val iterator = info.blocks.keySet.iterator
     while (iterator.hasNext) {
       val blockId = iterator.next
       val locations = blockLocations.get(blockId)._2
@@ -133,17 +128,15 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
     toRemove.foreach(removeBlockManager)
   }
 
-  def removeHost(host: String) {
-    logInfo("Trying to remove the host: " + host + " from BlockManagerMaster.")
-    logInfo("Previous hosts: " + blockManagerInfo.keySet.toSeq)
-    blockManagerIdByHost.get(host).foreach(_.foreach(removeBlockManager))
-    logInfo("Current hosts: " + blockManagerInfo.keySet.toSeq)
+  def removeExecutor(execId: String) {
+    logInfo("Trying to remove executor " + execId + " from BlockManagerMaster.")
+    blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
     sender ! true
   }
 
   def heartBeat(blockManagerId: BlockManagerId) {
     if (!blockManagerInfo.contains(blockManagerId)) {
-      if (blockManagerId.ip == Utils.localHostName() && !isLocal) {
+      if (blockManagerId.executorId == "<driver>" && !isLocal) {
         sender ! true
       } else {
         sender ! false
@@ -188,24 +181,20 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
     sender ! res
   }
 
-  private def register(blockManagerId: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
-    val startTimeMs = System.currentTimeMillis()
-    val tmp = " " + blockManagerId + " "
-
-    if (blockManagerId.ip == Utils.localHostName() && !isLocal) {
-      logInfo("Got Register Msg from master node, don't register it")
-    } else if (!blockManagerInfo.contains(blockManagerId)) {
-      blockManagerIdByHost.get(blockManagerId.ip) match {
-        case Some(managers) =>
-          // A block manager of the same host name already exists.
-          logInfo("Got another registration for host " + blockManagerId)
-          managers += blockManagerId
+  private def register(id: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
+    if (id.executorId == "<driver>" && !isLocal) {
+      // Got a register message from the master node; don't register it
+    } else if (!blockManagerInfo.contains(id)) {
+      blockManagerIdByExecutor.get(id.executorId) match {
+        case Some(manager) =>
+          // A block manager of the same host name already exists
+          logError("Got two different block manager registrations on " + id.executorId)
+          System.exit(1)
         case None =>
-          blockManagerIdByHost += (blockManagerId.ip -> ArrayBuffer(blockManagerId))
+          blockManagerIdByExecutor(id.executorId) = id
       }
-
-      blockManagerInfo += (blockManagerId -> new BlockManagerMasterActor.BlockManagerInfo(
-        blockManagerId, System.currentTimeMillis(), maxMemSize, slaveActor))
+      blockManagerInfo(id) = new BlockManagerMasterActor.BlockManagerInfo(
+        id, System.currentTimeMillis(), maxMemSize, slaveActor)
     }
     sender ! true
   }
@@ -217,11 +206,8 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
       memSize: Long,
       diskSize: Long) {
 
-    val startTimeMs = System.currentTimeMillis()
-    val tmp = " " + blockManagerId + " " + blockId + " "
-
     if (!blockManagerInfo.contains(blockManagerId)) {
-      if (blockManagerId.ip == Utils.localHostName() && !isLocal) {
+      if (blockManagerId.executorId == "<driver>" && !isLocal) {
         // We intentionally do not register the master (except in local mode),
         // so we should not indicate failure.
         sender ! true
@@ -353,8 +339,8 @@ object BlockManagerMasterActor {
       _lastSeenMs = System.currentTimeMillis()
     }
 
-    def updateBlockInfo(blockId: String, storageLevel: StorageLevel, memSize: Long, diskSize: Long)
-      : Unit = synchronized {
+    def updateBlockInfo(blockId: String, storageLevel: StorageLevel, memSize: Long,
+                        diskSize: Long) {
 
       updateLastSeenMs()
 
diff --git a/core/src/main/scala/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
index 3d03ff3a93..1494f90103 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
@@ -88,7 +88,7 @@ private[spark]
 case class GetPeers(blockManagerId: BlockManagerId, size: Int) extends ToBlockManagerMaster
 
 private[spark]
-case class RemoveHost(host: String) extends ToBlockManagerMaster
+case class RemoveExecutor(execId: String) extends ToBlockManagerMaster
 
 private[spark]
 case object StopBlockManagerMaster extends ToBlockManagerMaster
diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
index 1003cc7a61..b7423c7234 100644
--- a/core/src/main/scala/spark/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -11,6 +11,7 @@ import cc.spray.typeconversion.TwirlSupport._
 import scala.collection.mutable.ArrayBuffer
 import spark.{Logging, SparkContext, SparkEnv}
 import spark.util.AkkaUtils
+import spark.Utils
 
 
 private[spark]
@@ -20,10 +21,10 @@ object BlockManagerUI extends Logging {
   def start(actorSystem : ActorSystem, masterActor: ActorRef, sc: SparkContext) {
     val webUIDirectives = new BlockManagerUIDirectives(actorSystem, masterActor, sc)
     try {
-      logInfo("Starting BlockManager WebUI.")
-      val port = Option(System.getenv("BLOCKMANAGER_UI_PORT")).getOrElse("9080").toInt
-      AkkaUtils.startSprayServer(actorSystem, "0.0.0.0", port, 
+      val boundPort = AkkaUtils.startSprayServer(actorSystem, "0.0.0.0",
+        Option(System.getenv("BLOCKMANAGER_UI_PORT")).getOrElse("9080").toInt,
         webUIDirectives.handler, "BlockManagerHTTPServer")
+      logInfo("Started BlockManager web UI at %s:%d".format(Utils.localHostName(), boundPort))
     } catch {
       case e: Exception =>
         logError("Failed to create BlockManager WebUI", e)
diff --git a/core/src/main/scala/spark/storage/ThreadingTest.scala b/core/src/main/scala/spark/storage/ThreadingTest.scala
index 689f07b969..f04c046c31 100644
--- a/core/src/main/scala/spark/storage/ThreadingTest.scala
+++ b/core/src/main/scala/spark/storage/ThreadingTest.scala
@@ -78,7 +78,8 @@ private[spark] object ThreadingTest {
     val masterIp: String = System.getProperty("spark.master.host", "localhost")
     val masterPort: Int = System.getProperty("spark.master.port", "7077").toInt
     val blockManagerMaster = new BlockManagerMaster(actorSystem, true, true, masterIp, masterPort)
-    val blockManager = new BlockManager(actorSystem, blockManagerMaster, serializer, 1024 * 1024)
+    val blockManager = new BlockManager(
+      "<driver>", actorSystem, blockManagerMaster, serializer, 1024 * 1024)
     val producers = (1 to numProducers).map(i => new ProducerThread(blockManager, i))
     val consumers = producers.map(p => new ConsumerThread(blockManager, p.queue))
     producers.foreach(_.start)
diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/spark/util/AkkaUtils.scala
index ff2c3079be..775ff8f1aa 100644
--- a/core/src/main/scala/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/spark/util/AkkaUtils.scala
@@ -52,10 +52,10 @@ private[spark] object AkkaUtils {
 
   /**
    * Creates a Spray HTTP server bound to a given IP and port with a given Spray Route object to
-   * handle requests. Throws a SparkException if this fails.
+   * handle requests. Returns the bound port or throws a SparkException on failure.
    */
   def startSprayServer(actorSystem: ActorSystem, ip: String, port: Int, route: Route, 
-    name: String = "HttpServer") {
+      name: String = "HttpServer"): Int = {
     val ioWorker = new IoWorker(actorSystem).start()
     val httpService = actorSystem.actorOf(Props(new HttpService(route)))
     val rootService = actorSystem.actorOf(Props(new SprayCanRootService(httpService)))
@@ -67,7 +67,7 @@ private[spark] object AkkaUtils {
     try {
       Await.result(future, timeout) match {
         case bound: HttpServer.Bound =>
-          return
+          return bound.endpoint.getPort
         case other: Any =>
           throw new SparkException("Failed to bind web UI to port " + port + ": " + other)
       }
diff --git a/core/src/main/scala/spark/util/TimeStampedHashMap.scala b/core/src/main/scala/spark/util/TimeStampedHashMap.scala
index bb7c5c01c8..188f8910da 100644
--- a/core/src/main/scala/spark/util/TimeStampedHashMap.scala
+++ b/core/src/main/scala/spark/util/TimeStampedHashMap.scala
@@ -63,9 +63,9 @@ class TimeStampedHashMap[A, B] extends Map[A, B]() with spark.Logging {
 
   override def empty: Map[A, B] = new TimeStampedHashMap[A, B]()
 
-  override def size(): Int = internalMap.size()
+  override def size: Int = internalMap.size
 
-  override def foreach[U](f: ((A, B)) => U): Unit = {
+  override def foreach[U](f: ((A, B)) => U) {
     val iterator = internalMap.entrySet().iterator()
     while(iterator.hasNext) {
       val entry = iterator.next()
diff --git a/core/src/test/scala/spark/DriverSuite.scala b/core/src/test/scala/spark/DriverSuite.scala
index 70a7c8bc2f..342610e1dd 100644
--- a/core/src/test/scala/spark/DriverSuite.scala
+++ b/core/src/test/scala/spark/DriverSuite.scala
@@ -13,7 +13,8 @@ class DriverSuite extends FunSuite with Timeouts {
     val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
     forAll(masters) { (master: String) =>
       failAfter(10 seconds) {
-        Utils.execute(Seq("./run", "spark.DriverWithoutCleanup", master), new File(System.getenv("SPARK_HOME")))
+        Utils.execute(Seq("./run", "spark.DriverWithoutCleanup", master),
+          new File(System.getenv("SPARK_HOME")))
       }
     }
   }
@@ -28,4 +29,4 @@ object DriverWithoutCleanup {
     val sc = new SparkContext(args(0), "DriverWithoutCleanup")
     sc.parallelize(1 to 100, 4).count()
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
index 7d5305f1e0..e8fe7ecabc 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
@@ -43,13 +43,13 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val compressedSize10000 = MapOutputTracker.compressSize(10000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
     val size10000 = MapOutputTracker.decompressSize(compressedSize10000)
-    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("hostA", 1000),
+    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("a", "hostA", 1000),
         Array(compressedSize1000, compressedSize10000)))
-    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("hostB", 1000),
+    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("b", "hostB", 1000),
         Array(compressedSize10000, compressedSize1000)))
     val statuses = tracker.getServerStatuses(10, 0)
-    assert(statuses.toSeq === Seq((BlockManagerId("hostA", 1000), size1000),
-                                  (BlockManagerId("hostB", 1000), size10000)))
+    assert(statuses.toSeq === Seq((BlockManagerId("a", "hostA", 1000), size1000),
+                                  (BlockManagerId("b", "hostB", 1000), size10000)))
     tracker.stop()
   }
 
@@ -61,47 +61,52 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val compressedSize10000 = MapOutputTracker.compressSize(10000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
     val size10000 = MapOutputTracker.decompressSize(compressedSize10000)
-    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("hostA", 1000),
+    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("a", "hostA", 1000),
         Array(compressedSize1000, compressedSize1000, compressedSize1000)))
-    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("hostB", 1000),
+    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("b", "hostB", 1000),
         Array(compressedSize10000, compressedSize1000, compressedSize1000)))
 
     // As if we had two simulatenous fetch failures
-    tracker.unregisterMapOutput(10, 0, BlockManagerId("hostA", 1000))
-    tracker.unregisterMapOutput(10, 0, BlockManagerId("hostA", 1000))
+    tracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000))
+    tracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000))
 
-    // The remaining reduce task might try to grab the output dispite the shuffle failure;
+    // The remaining reduce task might try to grab the output despite the shuffle failure;
     // this should cause it to fail, and the scheduler will ignore the failure due to the
     // stage already being aborted.
     intercept[FetchFailedException] { tracker.getServerStatuses(10, 1) }
   }
 
   test("remote fetch") {
-    val (actorSystem, boundPort) =
-      AkkaUtils.createActorSystem("test", "localhost", 0)
-    System.setProperty("spark.master.port", boundPort.toString)
-    val masterTracker = new MapOutputTracker(actorSystem, true)
-    val slaveTracker = new MapOutputTracker(actorSystem, false)
-    masterTracker.registerShuffle(10, 1)
-    masterTracker.incrementGeneration()
-    slaveTracker.updateGeneration(masterTracker.getGeneration)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+    try {
+      System.clearProperty("spark.master.host")  // In case some previous test had set it
+      val (actorSystem, boundPort) =
+        AkkaUtils.createActorSystem("test", "localhost", 0)
+      System.setProperty("spark.master.port", boundPort.toString)
+      val masterTracker = new MapOutputTracker(actorSystem, true)
+      val slaveTracker = new MapOutputTracker(actorSystem, false)
+      masterTracker.registerShuffle(10, 1)
+      masterTracker.incrementGeneration()
+      slaveTracker.updateGeneration(masterTracker.getGeneration)
+      intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
 
-    val compressedSize1000 = MapOutputTracker.compressSize(1000L)
-    val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    masterTracker.registerMapOutput(10, 0, new MapStatus(
-      BlockManagerId("hostA", 1000), Array(compressedSize1000)))
-    masterTracker.incrementGeneration()
-    slaveTracker.updateGeneration(masterTracker.getGeneration)
-    assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
-           Seq((BlockManagerId("hostA", 1000), size1000)))
+      val compressedSize1000 = MapOutputTracker.compressSize(1000L)
+      val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
+      masterTracker.registerMapOutput(10, 0, new MapStatus(
+        BlockManagerId("a", "hostA", 1000), Array(compressedSize1000)))
+      masterTracker.incrementGeneration()
+      slaveTracker.updateGeneration(masterTracker.getGeneration)
+      assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
+             Seq((BlockManagerId("a", "hostA", 1000), size1000)))
 
-    masterTracker.unregisterMapOutput(10, 0, BlockManagerId("hostA", 1000))
-    masterTracker.incrementGeneration()
-    slaveTracker.updateGeneration(masterTracker.getGeneration)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+      masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000))
+      masterTracker.incrementGeneration()
+      slaveTracker.updateGeneration(masterTracker.getGeneration)
+      intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
 
-    // failure should be cached
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+      // failure should be cached
+      intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+    } finally {
+      System.clearProperty("spark.master.port")
+    }
   }
 }
diff --git a/core/src/test/scala/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
index 2165744689..2d177bbf67 100644
--- a/core/src/test/scala/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/spark/storage/BlockManagerSuite.scala
@@ -86,9 +86,9 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("BlockManagerId object caching") {
-    val id1 = BlockManagerId("XXX", 1)
-    val id2 = BlockManagerId("XXX", 1) // this should return the same object as id1
-    val id3 = BlockManagerId("XXX", 2) // this should return a different object
+    val id1 = BlockManagerId("e1", "XXX", 1)
+    val id2 = BlockManagerId("e1", "XXX", 1) // this should return the same object as id1
+    val id3 = BlockManagerId("e1", "XXX", 2) // this should return a different object
     assert(id2 === id1, "id2 is not same as id1")
     assert(id2.eq(id1), "id2 is not the same object as id1")
     assert(id3 != id1, "id3 is same as id1")
@@ -103,7 +103,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("master + 1 manager interaction") {
-    store = new BlockManager(actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -133,8 +133,8 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("master + 2 managers interaction") {
-    store = new BlockManager(actorSystem, master, serializer, 2000)
-    store2 = new BlockManager(actorSystem, master, new KryoSerializer, 2000)
+    store = new BlockManager("exec1", actorSystem, master, serializer, 2000)
+    store2 = new BlockManager("exec2", actorSystem, master, new KryoSerializer, 2000)
 
     val peers = master.getPeers(store.blockManagerId, 1)
     assert(peers.size === 1, "master did not return the other manager as a peer")
@@ -149,7 +149,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("removing block") {
-    store = new BlockManager(actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -198,7 +198,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
 
   test("reregistration on heart beat") {
     val heartBeat = PrivateMethod[Unit]('heartBeat)
-    store = new BlockManager(actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
     val a1 = new Array[Byte](400)
 
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
@@ -206,7 +206,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     assert(store.getSingle("a1") != None, "a1 was not in store")
     assert(master.getLocations("a1").size > 0, "master was not told about a1")
 
-    master.notifyADeadHost(store.blockManagerId.ip)
+    master.removeExecutor(store.blockManagerId.executorId)
     assert(master.getLocations("a1").size == 0, "a1 was not removed from master")
 
     store invokePrivate heartBeat()
@@ -214,14 +214,14 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("reregistration on block update") {
-    store = new BlockManager(actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
 
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
     assert(master.getLocations("a1").size > 0, "master was not told about a1")
 
-    master.notifyADeadHost(store.blockManagerId.ip)
+    master.removeExecutor(store.blockManagerId.executorId)
     assert(master.getLocations("a1").size == 0, "a1 was not removed from master")
 
     store.putSingle("a2", a1, StorageLevel.MEMORY_ONLY)
@@ -233,35 +233,35 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
 
   test("reregistration doesn't dead lock") {
     val heartBeat = PrivateMethod[Unit]('heartBeat)
-    store = new BlockManager(actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
     val a1 = new Array[Byte](400)
     val a2 = List(new Array[Byte](400))
 
     // try many times to trigger any deadlocks
     for (i <- 1 to 100) {
-      master.notifyADeadHost(store.blockManagerId.ip)
+      master.removeExecutor(store.blockManagerId.executorId)
       val t1 = new Thread {
-        override def run = {
+        override def run() {
           store.put("a2", a2.iterator, StorageLevel.MEMORY_ONLY, true)
         }
       }
       val t2 = new Thread {
-        override def run = {
+        override def run() {
           store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
         }
       }
       val t3 = new Thread {
-        override def run = {
+        override def run() {
           store invokePrivate heartBeat()
         }
       }
 
-      t1.start
-      t2.start
-      t3.start
-      t1.join
-      t2.join
-      t3.join
+      t1.start()
+      t2.start()
+      t3.start()
+      t1.join()
+      t2.join()
+      t3.join()
  
       store.dropFromMemory("a1", null)
       store.dropFromMemory("a2", null)
@@ -270,7 +270,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("in-memory LRU storage") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -289,7 +289,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("in-memory LRU storage with serialization") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -308,14 +308,14 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("in-memory LRU for partitions of same RDD") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
     store.putSingle("rdd_0_1", a1, StorageLevel.MEMORY_ONLY)
     store.putSingle("rdd_0_2", a2, StorageLevel.MEMORY_ONLY)
     store.putSingle("rdd_0_3", a3, StorageLevel.MEMORY_ONLY)
-    // Even though we accessed rdd_0_3 last, it should not have replaced partitiosn 1 and 2
+    // Even though we accessed rdd_0_3 last, it should not have replaced partitions 1 and 2
     // from the same RDD
     assert(store.getSingle("rdd_0_3") === None, "rdd_0_3 was in store")
     assert(store.getSingle("rdd_0_2") != None, "rdd_0_2 was not in store")
@@ -327,7 +327,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("in-memory LRU for partitions of multiple RDDs") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     store.putSingle("rdd_0_1", new Array[Byte](400), StorageLevel.MEMORY_ONLY)
     store.putSingle("rdd_0_2", new Array[Byte](400), StorageLevel.MEMORY_ONLY)
     store.putSingle("rdd_1_1", new Array[Byte](400), StorageLevel.MEMORY_ONLY)
@@ -350,7 +350,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("on-disk storage") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -363,7 +363,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("disk and memory storage") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -378,7 +378,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("disk and memory storage with getLocalBytes") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -393,7 +393,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("disk and memory storage with serialization") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -408,7 +408,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("disk and memory storage with serialization and getLocalBytes") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -423,7 +423,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("LRU with mixed storage levels") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -448,7 +448,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("in-memory LRU with streams") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val list1 = List(new Array[Byte](200), new Array[Byte](200))
     val list2 = List(new Array[Byte](200), new Array[Byte](200))
     val list3 = List(new Array[Byte](200), new Array[Byte](200))
@@ -472,7 +472,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("LRU with mixed storage levels and streams") {
-    store = new BlockManager(actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
     val list1 = List(new Array[Byte](200), new Array[Byte](200))
     val list2 = List(new Array[Byte](200), new Array[Byte](200))
     val list3 = List(new Array[Byte](200), new Array[Byte](200))
@@ -518,7 +518,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("overly large block") {
-    store = new BlockManager(actorSystem, master, serializer, 500)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 500)
     store.putSingle("a1", new Array[Byte](1000), StorageLevel.MEMORY_ONLY)
     assert(store.getSingle("a1") === None, "a1 was in store")
     store.putSingle("a2", new Array[Byte](1000), StorageLevel.MEMORY_AND_DISK)
@@ -529,49 +529,49 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   test("block compression") {
     try {
       System.setProperty("spark.shuffle.compress", "true")
-      store = new BlockManager(actorSystem, master, serializer, 2000)
+      store = new BlockManager("exec1", actorSystem, master, serializer, 2000)
       store.putSingle("shuffle_0_0_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize("shuffle_0_0_0") <= 100, "shuffle_0_0_0 was not compressed")
       store.stop()
       store = null
 
       System.setProperty("spark.shuffle.compress", "false")
-      store = new BlockManager(actorSystem, master, serializer, 2000)
+      store = new BlockManager("exec2", actorSystem, master, serializer, 2000)
       store.putSingle("shuffle_0_0_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize("shuffle_0_0_0") >= 1000, "shuffle_0_0_0 was compressed")
       store.stop()
       store = null
 
       System.setProperty("spark.broadcast.compress", "true")
-      store = new BlockManager(actorSystem, master, serializer, 2000)
+      store = new BlockManager("exec3", actorSystem, master, serializer, 2000)
       store.putSingle("broadcast_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize("broadcast_0") <= 100, "broadcast_0 was not compressed")
       store.stop()
       store = null
 
       System.setProperty("spark.broadcast.compress", "false")
-      store = new BlockManager(actorSystem, master, serializer, 2000)
+      store = new BlockManager("exec4", actorSystem, master, serializer, 2000)
       store.putSingle("broadcast_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize("broadcast_0") >= 1000, "broadcast_0 was compressed")
       store.stop()
       store = null
 
       System.setProperty("spark.rdd.compress", "true")
-      store = new BlockManager(actorSystem, master, serializer, 2000)
+      store = new BlockManager("exec5", actorSystem, master, serializer, 2000)
       store.putSingle("rdd_0_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize("rdd_0_0") <= 100, "rdd_0_0 was not compressed")
       store.stop()
       store = null
 
       System.setProperty("spark.rdd.compress", "false")
-      store = new BlockManager(actorSystem, master, serializer, 2000)
+      store = new BlockManager("exec6", actorSystem, master, serializer, 2000)
       store.putSingle("rdd_0_0", new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize("rdd_0_0") >= 1000, "rdd_0_0 was compressed")
       store.stop()
       store = null
 
       // Check that any other block types are also kept uncompressed
-      store = new BlockManager(actorSystem, master, serializer, 2000)
+      store = new BlockManager("exec7", actorSystem, master, serializer, 2000)
       store.putSingle("other_block", new Array[Byte](1000), StorageLevel.MEMORY_ONLY)
       assert(store.memoryStore.getSize("other_block") >= 1000, "other_block was compressed")
       store.stop()
diff --git a/sbt/sbt b/sbt/sbt
index a3055c13c1..8f426d18e8 100755
--- a/sbt/sbt
+++ b/sbt/sbt
@@ -5,4 +5,4 @@ if [ "$MESOS_HOME" != "" ]; then
 fi
 export SPARK_HOME=$(cd "$(dirname $0)/.."; pwd)
 export SPARK_TESTING=1  # To put test classes on classpath
-java -Xmx1200M -XX:MaxPermSize=200m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@"
+java -Xmx1200M -XX:MaxPermSize=250m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@"

From 909850729ec59b788645575fdc03df7cc51fe42b Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 27 Jan 2013 23:17:20 -0800
Subject: [PATCH 211/291] Rename more things from slave to executor

---
 .../spark/deploy/worker/ExecutorRunner.scala  |  2 +-
 .../executor/StandaloneExecutorBackend.scala  | 12 ++---
 .../scheduler/cluster/SlaveResources.scala    |  4 --
 .../cluster/SparkDeploySchedulerBackend.scala | 16 ++-----
 .../cluster/StandaloneClusterMessage.scala    | 16 ++++---
 .../cluster/StandaloneSchedulerBackend.scala  | 48 +++++++++----------
 .../scala/spark/storage/BlockManagerUI.scala  |  2 +
 .../scala/spark/util/MetadataCleaner.scala    | 10 ++--
 8 files changed, 50 insertions(+), 60 deletions(-)
 delete mode 100644 core/src/main/scala/spark/scheduler/cluster/SlaveResources.scala

diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
index af3acfecb6..f5ff267d44 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
@@ -65,7 +65,7 @@ private[spark] class ExecutorRunner(
     }
   }
 
-  /** Replace variables such as {{SLAVEID}} and {{CORES}} in a command argument passed to us */
+  /** Replace variables such as {{EXECUTOR_ID}} and {{CORES}} in a command argument passed to us */
   def substituteVariables(argument: String): String = argument match {
     case "{{EXECUTOR_ID}}" => execId.toString
     case "{{HOSTNAME}}" => hostname
diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
index 435ee5743e..50871802ea 100644
--- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
+++ b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
@@ -8,10 +8,10 @@ import akka.actor.{ActorRef, Actor, Props}
 import java.util.concurrent.{TimeUnit, ThreadPoolExecutor, SynchronousQueue}
 import akka.remote.RemoteClientLifeCycleEvent
 import spark.scheduler.cluster._
-import spark.scheduler.cluster.RegisteredSlave
+import spark.scheduler.cluster.RegisteredExecutor
 import spark.scheduler.cluster.LaunchTask
-import spark.scheduler.cluster.RegisterSlaveFailed
-import spark.scheduler.cluster.RegisterSlave
+import spark.scheduler.cluster.RegisterExecutorFailed
+import spark.scheduler.cluster.RegisterExecutor
 
 
 private[spark] class StandaloneExecutorBackend(
@@ -30,7 +30,7 @@ private[spark] class StandaloneExecutorBackend(
     try {
       logInfo("Connecting to master: " + masterUrl)
       master = context.actorFor(masterUrl)
-      master ! RegisterSlave(executorId, hostname, cores)
+      master ! RegisterExecutor(executorId, hostname, cores)
       context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
       context.watch(master) // Doesn't work with remote actors, but useful for testing
     } catch {
@@ -41,11 +41,11 @@ private[spark] class StandaloneExecutorBackend(
   }
 
   override def receive = {
-    case RegisteredSlave(sparkProperties) =>
+    case RegisteredExecutor(sparkProperties) =>
       logInfo("Successfully registered with master")
       executor.initialize(executorId, hostname, sparkProperties)
 
-    case RegisterSlaveFailed(message) =>
+    case RegisterExecutorFailed(message) =>
       logError("Slave registration failed: " + message)
       System.exit(1)
 
diff --git a/core/src/main/scala/spark/scheduler/cluster/SlaveResources.scala b/core/src/main/scala/spark/scheduler/cluster/SlaveResources.scala
deleted file mode 100644
index 96ebaa4601..0000000000
--- a/core/src/main/scala/spark/scheduler/cluster/SlaveResources.scala
+++ /dev/null
@@ -1,4 +0,0 @@
-package spark.scheduler.cluster
-
-private[spark]
-class SlaveResources(val slaveId: String, val hostname: String, val coresFree: Int) {}
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index f0792c1b76..6dd3ae003d 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -19,7 +19,6 @@ private[spark] class SparkDeploySchedulerBackend(
   var shutdownCallback : (SparkDeploySchedulerBackend) => Unit = _
 
   val maxCores = System.getProperty("spark.cores.max", Int.MaxValue.toString).toInt
-  val executorIdToSlaveId = new HashMap[String, String]
 
   // Memory used by each executor (in megabytes)
   val executorMemory = {
@@ -47,7 +46,7 @@ private[spark] class SparkDeploySchedulerBackend(
   }
 
   override def stop() {
-    stopping = true;
+    stopping = true
     super.stop()
     client.stop()
     if (shutdownCallback != null) {
@@ -67,23 +66,16 @@ private[spark] class SparkDeploySchedulerBackend(
   }
 
   def executorAdded(id: String, workerId: String, host: String, cores: Int, memory: Int) {
-    executorIdToSlaveId += id -> workerId
     logInfo("Granted executor ID %s on host %s with %d cores, %s RAM".format(
        id, host, cores, Utils.memoryMegabytesToString(memory)))
   }
 
-  def executorRemoved(id: String, message: String, exitStatus: Option[Int]) {
+  def executorRemoved(executorId: String, message: String, exitStatus: Option[Int]) {
     val reason: ExecutorLossReason = exitStatus match {
       case Some(code) => ExecutorExited(code)
       case None => SlaveLost(message)
     }
-    logInfo("Executor %s removed: %s".format(id, message))
-    executorIdToSlaveId.get(id) match {
-      case Some(slaveId) => 
-        executorIdToSlaveId.remove(id)
-        scheduler.executorLost(slaveId, reason)
-      case None =>
-        logInfo("No slave ID known for executor %s".format(id))
-    }
+    logInfo("Executor %s removed: %s".format(executorId, message))
+    scheduler.executorLost(executorId, reason)
   }
 }
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
index 1386cd9d44..c68f15bdfa 100644
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
@@ -11,24 +11,26 @@ private[spark]
 case class LaunchTask(task: TaskDescription) extends StandaloneClusterMessage
 
 private[spark]
-case class RegisteredSlave(sparkProperties: Seq[(String, String)]) extends StandaloneClusterMessage
+case class RegisteredExecutor(sparkProperties: Seq[(String, String)])
+  extends StandaloneClusterMessage
 
 private[spark]
-case class RegisterSlaveFailed(message: String) extends StandaloneClusterMessage
+case class RegisterExecutorFailed(message: String) extends StandaloneClusterMessage
 
-// Slaves to master
+// Executors to master
 private[spark]
-case class RegisterSlave(slaveId: String, host: String, cores: Int) extends StandaloneClusterMessage
+case class RegisterExecutor(executorId: String, host: String, cores: Int)
+  extends StandaloneClusterMessage
 
 private[spark]
-case class StatusUpdate(slaveId: String, taskId: Long, state: TaskState, data: SerializableBuffer)
+case class StatusUpdate(executorId: String, taskId: Long, state: TaskState, data: SerializableBuffer)
   extends StandaloneClusterMessage
 
 private[spark]
 object StatusUpdate {
   /** Alternate factory method that takes a ByteBuffer directly for the data field */
-  def apply(slaveId: String, taskId: Long, state: TaskState, data: ByteBuffer): StatusUpdate = {
-    StatusUpdate(slaveId, taskId, state, new SerializableBuffer(data))
+  def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer): StatusUpdate = {
+    StatusUpdate(executorId, taskId, state, new SerializableBuffer(data))
   }
 }
 
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 32be1e7a26..69822f568c 100644
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -24,9 +24,9 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
   var totalCoreCount = new AtomicInteger(0)
 
   class MasterActor(sparkProperties: Seq[(String, String)]) extends Actor {
-    val slaveActor = new HashMap[String, ActorRef]
-    val slaveAddress = new HashMap[String, Address]
-    val slaveHost = new HashMap[String, String]
+    val executorActor = new HashMap[String, ActorRef]
+    val executorAddress = new HashMap[String, Address]
+    val executorHost = new HashMap[String, String]
     val freeCores = new HashMap[String, Int]
     val actorToExecutorId = new HashMap[ActorRef, String]
     val addressToExecutorId = new HashMap[Address, String]
@@ -37,17 +37,17 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     }
 
     def receive = {
-      case RegisterSlave(executorId, host, cores) =>
-        if (slaveActor.contains(executorId)) {
-          sender ! RegisterSlaveFailed("Duplicate executor ID: " + executorId)
+      case RegisterExecutor(executorId, host, cores) =>
+        if (executorActor.contains(executorId)) {
+          sender ! RegisterExecutorFailed("Duplicate executor ID: " + executorId)
         } else {
           logInfo("Registered executor: " + sender + " with ID " + executorId)
-          sender ! RegisteredSlave(sparkProperties)
+          sender ! RegisteredExecutor(sparkProperties)
           context.watch(sender)
-          slaveActor(executorId) = sender
-          slaveHost(executorId) = host
+          executorActor(executorId) = sender
+          executorHost(executorId) = host
           freeCores(executorId) = cores
-          slaveAddress(executorId) = sender.path.address
+          executorAddress(executorId) = sender.path.address
           actorToExecutorId(sender) = executorId
           addressToExecutorId(sender.path.address) = executorId
           totalCoreCount.addAndGet(cores)
@@ -69,45 +69,45 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
         context.stop(self)
 
       case Terminated(actor) =>
-        actorToExecutorId.get(actor).foreach(removeSlave(_, "Akka actor terminated"))
+        actorToExecutorId.get(actor).foreach(removeExecutor(_, "Akka actor terminated"))
 
       case RemoteClientDisconnected(transport, address) =>
-        addressToExecutorId.get(address).foreach(removeSlave(_, "remote Akka client disconnected"))
+        addressToExecutorId.get(address).foreach(removeExecutor(_, "remote Akka client disconnected"))
 
       case RemoteClientShutdown(transport, address) =>
-        addressToExecutorId.get(address).foreach(removeSlave(_, "remote Akka client shutdown"))
+        addressToExecutorId.get(address).foreach(removeExecutor(_, "remote Akka client shutdown"))
     }
 
-    // Make fake resource offers on all slaves
+    // Make fake resource offers on all executors
     def makeOffers() {
       launchTasks(scheduler.resourceOffers(
-        slaveHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))}))
+        executorHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))}))
     }
 
-    // Make fake resource offers on just one slave
+    // Make fake resource offers on just one executor
     def makeOffers(executorId: String) {
       launchTasks(scheduler.resourceOffers(
-        Seq(new WorkerOffer(executorId, slaveHost(executorId), freeCores(executorId)))))
+        Seq(new WorkerOffer(executorId, executorHost(executorId), freeCores(executorId)))))
     }
 
     // Launch tasks returned by a set of resource offers
     def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
       for (task <- tasks.flatten) {
         freeCores(task.executorId) -= 1
-        slaveActor(task.executorId) ! LaunchTask(task)
+        executorActor(task.executorId) ! LaunchTask(task)
       }
     }
 
     // Remove a disconnected slave from the cluster
-    def removeSlave(executorId: String, reason: String) {
+    def removeExecutor(executorId: String, reason: String) {
       logInfo("Slave " + executorId + " disconnected, so removing it")
       val numCores = freeCores(executorId)
-      actorToExecutorId -= slaveActor(executorId)
-      addressToExecutorId -= slaveAddress(executorId)
-      slaveActor -= executorId
-      slaveHost -= executorId
+      actorToExecutorId -= executorActor(executorId)
+      addressToExecutorId -= executorAddress(executorId)
+      executorActor -= executorId
+      executorHost -= executorId
       freeCores -= executorId
-      slaveHost -= executorId
+      executorHost -= executorId
       totalCoreCount.addAndGet(-numCores)
       scheduler.executorLost(executorId, SlaveLost(reason))
     }
diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
index b7423c7234..956ede201e 100644
--- a/core/src/main/scala/spark/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -21,6 +21,8 @@ object BlockManagerUI extends Logging {
   def start(actorSystem : ActorSystem, masterActor: ActorRef, sc: SparkContext) {
     val webUIDirectives = new BlockManagerUIDirectives(actorSystem, masterActor, sc)
     try {
+      // TODO: This needs to find a random free port to bind to. Unfortunately, there's no way
+      // in spray to do that, so we'll have to rely on something like new ServerSocket()
       val boundPort = AkkaUtils.startSprayServer(actorSystem, "0.0.0.0",
         Option(System.getenv("BLOCKMANAGER_UI_PORT")).getOrElse("9080").toInt,
         webUIDirectives.handler, "BlockManagerHTTPServer")
diff --git a/core/src/main/scala/spark/util/MetadataCleaner.scala b/core/src/main/scala/spark/util/MetadataCleaner.scala
index 139e21d09e..721c4c6029 100644
--- a/core/src/main/scala/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/spark/util/MetadataCleaner.scala
@@ -14,18 +14,16 @@ class MetadataCleaner(name: String, cleanupFunc: (Long) => Unit) extends Logging
   val task = new TimerTask {
     def run() {
       try {
-        if (delaySeconds > 0) {
-          cleanupFunc(System.currentTimeMillis() - (delaySeconds * 1000))
-          logInfo("Ran metadata cleaner for " + name)
-        }
+        cleanupFunc(System.currentTimeMillis() - (delaySeconds * 1000))
+        logInfo("Ran metadata cleaner for " + name)
       } catch {
         case e: Exception => logError("Error running cleanup task for " + name, e)
       }
     }
   }
 
-  if (periodSeconds > 0) {
-    logInfo(
+  if (delaySeconds > 0) {
+    logDebug(
       "Starting metadata cleaner for " + name + " with delay of " + delaySeconds + " seconds and "
       + "period of " + periodSeconds + " secs")
     timer.schedule(task, periodSeconds * 1000, periodSeconds * 1000)

From f03d9760fd8ac67fd0865cb355ba75d2eff507fe Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 27 Jan 2013 23:56:14 -0800
Subject: [PATCH 212/291] Clean up BlockManagerUI a little (make it not be an
 object, merge with Directives, and bind to a random port)

---
 core/src/main/scala/spark/SparkContext.scala  |   7 +-
 core/src/main/scala/spark/Utils.scala         |  17 ++-
 .../spark/deploy/master/MasterWebUI.scala     |   6 +-
 .../spark/deploy/worker/WorkerWebUI.scala     |   6 +-
 .../scala/spark/storage/BlockManagerUI.scala  | 120 +++++++++---------
 .../src/main/scala/spark/util/AkkaUtils.scala |   6 +-
 .../scala/spark/util/MetadataCleaner.scala    |   3 +
 7 files changed, 91 insertions(+), 74 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 39721b47ae..77036c1275 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -44,6 +44,7 @@ import scheduler.{ResultTask, ShuffleMapTask, DAGScheduler, TaskScheduler}
 import spark.scheduler.local.LocalScheduler
 import spark.scheduler.cluster.{SparkDeploySchedulerBackend, SchedulerBackend, ClusterScheduler}
 import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
+import storage.BlockManagerUI
 import util.{MetadataCleaner, TimeStampedHashMap}
 
 /**
@@ -88,8 +89,9 @@ class SparkContext(
   SparkEnv.set(env)
 
   // Start the BlockManager UI
-  spark.storage.BlockManagerUI.start(SparkEnv.get.actorSystem, 
-    SparkEnv.get.blockManager.master.masterActor, this)
+  private[spark] val ui = new BlockManagerUI(
+    env.actorSystem, env.blockManager.master.masterActor, this)
+  ui.start()
 
   // Used to store a URL for each static file/jar together with the file's local timestamp
   private[spark] val addedFiles = HashMap[String, Long]()
@@ -97,7 +99,6 @@ class SparkContext(
 
   // Keeps track of all persisted RDDs
   private[spark] val persistentRdds = new TimeStampedHashMap[Int, RDD[_]]()
-
   private[spark] val metadataCleaner = new MetadataCleaner("SparkContext", this.cleanup)
 
 
diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
index ae77264372..1e58d01273 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/spark/Utils.scala
@@ -1,7 +1,7 @@
 package spark
 
 import java.io._
-import java.net.{NetworkInterface, InetAddress, Inet4Address, URL, URI}
+import java.net._
 import java.util.{Locale, Random, UUID}
 import java.util.concurrent.{Executors, ThreadFactory, ThreadPoolExecutor}
 import org.apache.hadoop.conf.Configuration
@@ -11,6 +11,7 @@ import scala.collection.JavaConversions._
 import scala.io.Source
 import com.google.common.io.Files
 import com.google.common.util.concurrent.ThreadFactoryBuilder
+import scala.Some
 
 /**
  * Various utility methods used by Spark.
@@ -431,4 +432,18 @@ private object Utils extends Logging {
     }
     "%s at %s:%s".format(lastSparkMethod, firstUserFile, firstUserLine)
   }
+
+  /**
+   * Try to find a free port to bind to on the local host. This should ideally never be needed,
+   * except that, unfortunately, some of the networking libraries we currently rely on (e.g. Spray)
+   * don't let users bind to port 0 and then figure out which free port they actually bound to.
+   * We work around this by binding a ServerSocket and immediately unbinding it. This is *not*
+   * necessarily guaranteed to work, but it's the best we can do.
+   */
+  def findFreePort(): Int = {
+    val socket = new ServerSocket(0)
+    val portBound = socket.getLocalPort
+    socket.close()
+    portBound
+  }
 }
diff --git a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
index 458ee2d665..a01774f511 100644
--- a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
+++ b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
@@ -14,12 +14,15 @@ import cc.spray.typeconversion.SprayJsonSupport._
 import spark.deploy._
 import spark.deploy.JsonProtocol._
 
+/**
+ * Web UI server for the standalone master.
+ */
 private[spark]
 class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Directives {
   val RESOURCE_DIR = "spark/deploy/master/webui"
   val STATIC_RESOURCE_DIR = "spark/deploy/static"
   
-  implicit val timeout = Timeout(1 seconds)
+  implicit val timeout = Timeout(10 seconds)
   
   val handler = {
     get {
@@ -76,5 +79,4 @@ class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Direct
       getFromResourceDirectory(RESOURCE_DIR)
     }
   }
-
 }
diff --git a/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala b/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
index f9489d99fc..ef81f072a3 100644
--- a/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
+++ b/core/src/main/scala/spark/deploy/worker/WorkerWebUI.scala
@@ -13,12 +13,15 @@ import cc.spray.typeconversion.SprayJsonSupport._
 import spark.deploy.{WorkerState, RequestWorkerState}
 import spark.deploy.JsonProtocol._
 
+/**
+ * Web UI server for the standalone worker.
+ */
 private[spark]
 class WorkerWebUI(val actorSystem: ActorSystem, worker: ActorRef) extends Directives {
   val RESOURCE_DIR = "spark/deploy/worker/webui"
   val STATIC_RESOURCE_DIR = "spark/deploy/static"
   
-  implicit val timeout = Timeout(1 seconds)
+  implicit val timeout = Timeout(10 seconds)
   
   val handler = {
     get {
@@ -50,5 +53,4 @@ class WorkerWebUI(val actorSystem: ActorSystem, worker: ActorRef) extends Direct
       getFromResourceDirectory(RESOURCE_DIR)
     }
   }
-  
 }
diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
index 956ede201e..eda320fa47 100644
--- a/core/src/main/scala/spark/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -1,32 +1,41 @@
 package spark.storage
 
 import akka.actor.{ActorRef, ActorSystem}
-import akka.dispatch.Await
 import akka.pattern.ask
 import akka.util.Timeout
 import akka.util.duration._
-import cc.spray.Directives
 import cc.spray.directives._
 import cc.spray.typeconversion.TwirlSupport._
+import cc.spray.Directives
 import scala.collection.mutable.ArrayBuffer
-import spark.{Logging, SparkContext, SparkEnv}
+import spark.{Logging, SparkContext}
 import spark.util.AkkaUtils
 import spark.Utils
 
 
+/**
+ * Web UI server for the BlockManager inside each SparkContext.
+ */
 private[spark]
-object BlockManagerUI extends Logging {
+class BlockManagerUI(val actorSystem: ActorSystem, blockManagerMaster: ActorRef, sc: SparkContext)
+  extends Directives with Logging {
 
-  /* Starts the Web interface for the BlockManager */
-  def start(actorSystem : ActorSystem, masterActor: ActorRef, sc: SparkContext) {
-    val webUIDirectives = new BlockManagerUIDirectives(actorSystem, masterActor, sc)
+  val STATIC_RESOURCE_DIR = "spark/deploy/static"
+
+  implicit val timeout = Timeout(10 seconds)
+
+  /** Start a HTTP server to run the Web interface */
+  def start() {
     try {
-      // TODO: This needs to find a random free port to bind to. Unfortunately, there's no way
-      // in spray to do that, so we'll have to rely on something like new ServerSocket()
-      val boundPort = AkkaUtils.startSprayServer(actorSystem, "0.0.0.0",
-        Option(System.getenv("BLOCKMANAGER_UI_PORT")).getOrElse("9080").toInt,
-        webUIDirectives.handler, "BlockManagerHTTPServer")
-      logInfo("Started BlockManager web UI at %s:%d".format(Utils.localHostName(), boundPort))
+      val port = if (System.getProperty("spark.ui.port") != null) {
+        System.getProperty("spark.ui.port").toInt
+      } else {
+        // TODO: Unfortunately, it's not possible to pass port 0 to spray and figure out which
+        // random port it bound to, so we have to try to find a local one by creating a socket.
+        Utils.findFreePort()
+      }
+      AkkaUtils.startSprayServer(actorSystem, "0.0.0.0", port, handler, "BlockManagerHTTPServer")
+      logInfo("Started BlockManager web UI at http://%s:%d".format(Utils.localHostName(), port))
     } catch {
       case e: Exception =>
         logError("Failed to create BlockManager WebUI", e)
@@ -34,58 +43,43 @@ object BlockManagerUI extends Logging {
     }
   }
 
-}
-
-
-private[spark]
-class BlockManagerUIDirectives(val actorSystem: ActorSystem, master: ActorRef, 
-  sc: SparkContext) extends Directives {  
-
-  val STATIC_RESOURCE_DIR = "spark/deploy/static"
-  implicit val timeout = Timeout(1 seconds)
-
   val handler = {
-    
-    get { path("") { completeWith {
-      // Request the current storage status from the Master
-      val future = master ? GetStorageStatus
-      future.map { status =>
-        val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
-        
-        // Calculate macro-level statistics
-        val maxMem = storageStatusList.map(_.maxMem).reduce(_+_)
-        val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_)
-        val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize))
-          .reduceOption(_+_).getOrElse(0L)
-
-        val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc)
-
-        spark.storage.html.index.
-          render(maxMem, remainingMem, diskSpaceUsed, rdds, storageStatusList)
+    get {
+      path("") {
+        completeWith {
+          // Request the current storage status from the Master
+          val future = blockManagerMaster ? GetStorageStatus
+          future.map { status =>
+            // Calculate macro-level statistics
+            val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
+            val maxMem = storageStatusList.map(_.maxMem).reduce(_+_)
+            val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_)
+            val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize))
+              .reduceOption(_+_).getOrElse(0L)
+            val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc)
+            spark.storage.html.index.
+              render(maxMem, remainingMem, diskSpaceUsed, rdds, storageStatusList)
+          }
+        }
+      } ~
+      path("rdd") {
+        parameter("id") { id =>
+          completeWith {
+            val future = blockManagerMaster ? GetStorageStatus
+            future.map { status =>
+              val prefix = "rdd_" + id.toString
+              val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
+              val filteredStorageStatusList = StorageUtils.
+                filterStorageStatusByPrefix(storageStatusList, prefix)
+              val rddInfo = StorageUtils.rddInfoFromStorageStatus(filteredStorageStatusList, sc).head
+              spark.storage.html.rdd.render(rddInfo, filteredStorageStatusList)
+            }
+          }
+        }
+      } ~
+      pathPrefix("static") {
+        getFromResourceDirectory(STATIC_RESOURCE_DIR)
       }
-    }}} ~
-    get { path("rdd") { parameter("id") { id => { completeWith {
-      val future = master ? GetStorageStatus
-      future.map { status =>
-        val prefix = "rdd_" + id.toString
-
-
-        val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
-        val filteredStorageStatusList = StorageUtils.
-          filterStorageStatusByPrefix(storageStatusList, prefix)
-
-        val rddInfo = StorageUtils.rddInfoFromStorageStatus(filteredStorageStatusList, sc).head
-
-        spark.storage.html.rdd.render(rddInfo, filteredStorageStatusList)
-
-      }
-    }}}}} ~
-    pathPrefix("static") {
-      getFromResourceDirectory(STATIC_RESOURCE_DIR)
     }
-
   }
-
-  
-
 }
diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/spark/util/AkkaUtils.scala
index 775ff8f1aa..e0fdeffbc4 100644
--- a/core/src/main/scala/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/spark/util/AkkaUtils.scala
@@ -1,6 +1,6 @@
 package spark.util
 
-import akka.actor.{Props, ActorSystemImpl, ActorSystem}
+import akka.actor.{ActorRef, Props, ActorSystemImpl, ActorSystem}
 import com.typesafe.config.ConfigFactory
 import akka.util.duration._
 import akka.pattern.ask
@@ -55,7 +55,7 @@ private[spark] object AkkaUtils {
    * handle requests. Returns the bound port or throws a SparkException on failure.
    */
   def startSprayServer(actorSystem: ActorSystem, ip: String, port: Int, route: Route, 
-      name: String = "HttpServer"): Int = {
+      name: String = "HttpServer"): ActorRef = {
     val ioWorker = new IoWorker(actorSystem).start()
     val httpService = actorSystem.actorOf(Props(new HttpService(route)))
     val rootService = actorSystem.actorOf(Props(new SprayCanRootService(httpService)))
@@ -67,7 +67,7 @@ private[spark] object AkkaUtils {
     try {
       Await.result(future, timeout) match {
         case bound: HttpServer.Bound =>
-          return bound.endpoint.getPort
+          return server
         case other: Any =>
           throw new SparkException("Failed to bind web UI to port " + port + ": " + other)
       }
diff --git a/core/src/main/scala/spark/util/MetadataCleaner.scala b/core/src/main/scala/spark/util/MetadataCleaner.scala
index 721c4c6029..51fb440108 100644
--- a/core/src/main/scala/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/spark/util/MetadataCleaner.scala
@@ -5,6 +5,9 @@ import java.util.{TimerTask, Timer}
 import spark.Logging
 
 
+/**
+ * Runs a timer task to periodically clean up metadata (e.g. old files or hashtable entries)
+ */
 class MetadataCleaner(name: String, cleanupFunc: (Long) => Unit) extends Logging {
 
   val delaySeconds = MetadataCleaner.getDelaySeconds

From 286f8f876ff495df33a7966e77ca90d69f338450 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 28 Jan 2013 01:29:27 -0800
Subject: [PATCH 213/291] Change time unit in MetadataCleaner to seconds

---
 core/src/main/scala/spark/util/MetadataCleaner.scala | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/util/MetadataCleaner.scala b/core/src/main/scala/spark/util/MetadataCleaner.scala
index 51fb440108..6cf93a9b17 100644
--- a/core/src/main/scala/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/spark/util/MetadataCleaner.scala
@@ -9,7 +9,6 @@ import spark.Logging
  * Runs a timer task to periodically clean up metadata (e.g. old files or hashtable entries)
  */
 class MetadataCleaner(name: String, cleanupFunc: (Long) => Unit) extends Logging {
-
   val delaySeconds = MetadataCleaner.getDelaySeconds
   val periodSeconds = math.max(10, delaySeconds / 10)
   val timer = new Timer(name + " cleanup timer", true)
@@ -39,7 +38,7 @@ class MetadataCleaner(name: String, cleanupFunc: (Long) => Unit) extends Logging
 
 
 object MetadataCleaner {
-  def getDelaySeconds = (System.getProperty("spark.cleaner.delay", "-100").toDouble * 60).toInt
-  def setDelaySeconds(delay: Long) { System.setProperty("spark.cleaner.delay", delay.toString) }
+  def getDelaySeconds = System.getProperty("spark.cleaner.delay", "-1").toInt
+  def setDelaySeconds(delay: Int) { System.setProperty("spark.cleaner.delay", delay.toString) }
 }
 

From 07f568e1bfc67eead88e2c5dbfb9cac23e1ac8bc Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 24 Jan 2013 15:27:29 -0800
Subject: [PATCH 214/291] SPARK-658: Adding logging of stage duration

---
 .../scala/spark/scheduler/DAGScheduler.scala  | 21 +++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index bd541d4207..8aad667182 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -86,6 +86,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
 
   val activeJobs = new HashSet[ActiveJob]
   val resultStageToJob = new HashMap[Stage, ActiveJob]
+  val stageSubmissionTimes = new HashMap[Stage, Long]
 
   val metadataCleaner = new MetadataCleaner("DAGScheduler", this.cleanup)
 
@@ -393,6 +394,9 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       logDebug("New pending tasks: " + myPending)
       taskSched.submitTasks(
         new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.priority))
+      if (!stageSubmissionTimes.contains(stage)) {
+        stageSubmissionTimes.put(stage, System.currentTimeMillis())
+      }
     } else {
       logDebug("Stage " + stage + " is actually done; %b %d %d".format(
         stage.isAvailable, stage.numAvailableOutputs, stage.numPartitions))
@@ -407,6 +411,15 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   def handleTaskCompletion(event: CompletionEvent) {
     val task = event.task
     val stage = idToStage(task.stageId)
+
+    def stageFinished(stage: Stage) = {
+      val serviceTime = stageSubmissionTimes.remove(stage) match {
+        case Some(t) => (System.currentTimeMillis() - t).toString
+        case _ => "Unkown"
+      }
+      logInfo("%s (%s) finished in %s ms".format(stage, stage.origin, serviceTime))
+      running -= stage
+    }
     event.reason match {
       case Success =>
         logInfo("Completed " + task)
@@ -421,13 +434,13 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
                 if (!job.finished(rt.outputId)) {
                   job.finished(rt.outputId) = true
                   job.numFinished += 1
-                  job.listener.taskSucceeded(rt.outputId, event.result)
                   // If the whole job has finished, remove it
                   if (job.numFinished == job.numPartitions) {
                     activeJobs -= job
                     resultStageToJob -= stage
-                    running -= stage
+                    stageFinished(stage)
                   }
+                  job.listener.taskSucceeded(rt.outputId, event.result)
                 }
               case None =>
                 logInfo("Ignoring result from " + rt + " because its job has finished")
@@ -444,8 +457,8 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
               stage.addOutputLoc(smt.partition, status)
             }
             if (running.contains(stage) && pendingTasks(stage).isEmpty) {
-              logInfo(stage + " (" + stage.origin + ") finished; looking for newly runnable stages")
-              running -= stage
+              stageFinished(stage)
+              logInfo("looking for newly runnable stages")
               logInfo("running: " + running)
               logInfo("waiting: " + waiting)
               logInfo("failed: " + failed)

From c423be7d8e1349fc00431328b76b52f4eee8a975 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 24 Jan 2013 18:25:57 -0800
Subject: [PATCH 215/291] Renaming stage finished function

---
 core/src/main/scala/spark/scheduler/DAGScheduler.scala | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 8aad667182..bce7418e87 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -412,7 +412,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     val task = event.task
     val stage = idToStage(task.stageId)
 
-    def stageFinished(stage: Stage) = {
+    def markStageAsFinished(stage: Stage) = {
       val serviceTime = stageSubmissionTimes.remove(stage) match {
         case Some(t) => (System.currentTimeMillis() - t).toString
         case _ => "Unkown"
@@ -438,7 +438,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
                   if (job.numFinished == job.numPartitions) {
                     activeJobs -= job
                     resultStageToJob -= stage
-                    stageFinished(stage)
+                    markStageAsFinished(stage)
                   }
                   job.listener.taskSucceeded(rt.outputId, event.result)
                 }
@@ -457,7 +457,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
               stage.addOutputLoc(smt.partition, status)
             }
             if (running.contains(stage) && pendingTasks(stage).isEmpty) {
-              stageFinished(stage)
+              markStageAsFinished(stage)
               logInfo("looking for newly runnable stages")
               logInfo("running: " + running)
               logInfo("waiting: " + waiting)

From 501433f1d59b1b326c0a7169fa1fd6136f7628e3 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Mon, 28 Jan 2013 10:17:35 -0800
Subject: [PATCH 216/291] Making submission time a field

---
 core/src/main/scala/spark/scheduler/DAGScheduler.scala | 7 +++----
 core/src/main/scala/spark/scheduler/Stage.scala        | 3 +++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index bce7418e87..7ba1f3430a 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -86,7 +86,6 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
 
   val activeJobs = new HashSet[ActiveJob]
   val resultStageToJob = new HashMap[Stage, ActiveJob]
-  val stageSubmissionTimes = new HashMap[Stage, Long]
 
   val metadataCleaner = new MetadataCleaner("DAGScheduler", this.cleanup)
 
@@ -394,8 +393,8 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       logDebug("New pending tasks: " + myPending)
       taskSched.submitTasks(
         new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.priority))
-      if (!stageSubmissionTimes.contains(stage)) {
-        stageSubmissionTimes.put(stage, System.currentTimeMillis())
+      if (!stage.submissionTime.isDefined) {
+        stage.submissionTime = Some(System.currentTimeMillis())
       }
     } else {
       logDebug("Stage " + stage + " is actually done; %b %d %d".format(
@@ -413,7 +412,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     val stage = idToStage(task.stageId)
 
     def markStageAsFinished(stage: Stage) = {
-      val serviceTime = stageSubmissionTimes.remove(stage) match {
+      val serviceTime = stage.submissionTime match {
         case Some(t) => (System.currentTimeMillis() - t).toString
         case _ => "Unkown"
       }
diff --git a/core/src/main/scala/spark/scheduler/Stage.scala b/core/src/main/scala/spark/scheduler/Stage.scala
index e9419728e3..374114d870 100644
--- a/core/src/main/scala/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/spark/scheduler/Stage.scala
@@ -32,6 +32,9 @@ private[spark] class Stage(
   val outputLocs = Array.fill[List[MapStatus]](numPartitions)(Nil)
   var numAvailableOutputs = 0
 
+  /** When first task was submitted to scheduler. */
+  var submissionTime: Option[Long] = None
+
   private var nextAttemptId = 0
 
   def isAvailable: Boolean = {

From a423ee546c389b5ce0d2117299456712370d7ad1 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Tue, 22 Jan 2013 18:48:43 -0800
Subject: [PATCH 217/291] expose RDD & storage info directly via SparkContext

---
 core/src/main/scala/spark/SparkContext.scala  | 16 ++++++++
 .../spark/storage/BlockManagerMaster.scala    |  4 ++
 .../scala/spark/storage/BlockManagerUI.scala  | 39 +++++++------------
 .../scala/spark/storage/StorageUtils.scala    | 10 +++--
 4 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 77036c1275..be992250a9 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -46,6 +46,7 @@ import spark.scheduler.cluster.{SparkDeploySchedulerBackend, SchedulerBackend, C
 import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
 import storage.BlockManagerUI
 import util.{MetadataCleaner, TimeStampedHashMap}
+import storage.{StorageStatus, StorageUtils, RDDInfo}
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
@@ -473,6 +474,21 @@ class SparkContext(
     }
   }
 
+  /**
+   * Return information about what RDDs are cached, if they are in mem or on disk, how much space
+   * they take, etc.
+   */
+  def getRDDStorageInfo : Array[RDDInfo] = {
+    StorageUtils.rddInfoFromStorageStatus(getSlavesStorageStatus, this)
+  }
+
+  /**
+   * Return information about blocks stored in all of the slaves
+   */
+  def getSlavesStorageStatus : Array[StorageStatus] = {
+    env.blockManager.master.getStorageStatus
+  }
+
   /**
    * Clear the job's list of files added by `addFile` so that they do not get downloaded to
    * any new nodes.
diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
index 55ff1dde9c..c7ee76f0b7 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
@@ -118,6 +118,10 @@ private[spark] class BlockManagerMaster(
     askMasterWithRetry[Map[BlockManagerId, (Long, Long)]](GetMemoryStatus)
   }
 
+  def getStorageStatus: Array[StorageStatus] = {
+    askMasterWithRetry[ArrayBuffer[StorageStatus]](GetStorageStatus).toArray
+  }
+
   /** Stop the master actor, called only on the Spark master node */
   def stop() {
     if (masterActor != null) {
diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
index eda320fa47..52f6d1b657 100644
--- a/core/src/main/scala/spark/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -1,13 +1,10 @@
 package spark.storage
 
 import akka.actor.{ActorRef, ActorSystem}
-import akka.pattern.ask
 import akka.util.Timeout
 import akka.util.duration._
-import cc.spray.directives._
 import cc.spray.typeconversion.TwirlSupport._
 import cc.spray.Directives
-import scala.collection.mutable.ArrayBuffer
 import spark.{Logging, SparkContext}
 import spark.util.AkkaUtils
 import spark.Utils
@@ -48,32 +45,26 @@ class BlockManagerUI(val actorSystem: ActorSystem, blockManagerMaster: ActorRef,
       path("") {
         completeWith {
           // Request the current storage status from the Master
-          val future = blockManagerMaster ? GetStorageStatus
-          future.map { status =>
-            // Calculate macro-level statistics
-            val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
-            val maxMem = storageStatusList.map(_.maxMem).reduce(_+_)
-            val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_)
-            val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize))
-              .reduceOption(_+_).getOrElse(0L)
-            val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc)
-            spark.storage.html.index.
-              render(maxMem, remainingMem, diskSpaceUsed, rdds, storageStatusList)
-          }
+          val storageStatusList = sc.getSlavesStorageStatus
+          // Calculate macro-level statistics
+          val maxMem = storageStatusList.map(_.maxMem).reduce(_+_)
+          val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_)
+          val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize))
+            .reduceOption(_+_).getOrElse(0L)
+          val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc)
+          spark.storage.html.index.
+            render(maxMem, remainingMem, diskSpaceUsed, rdds, storageStatusList)
         }
       } ~
       path("rdd") {
         parameter("id") { id =>
           completeWith {
-            val future = blockManagerMaster ? GetStorageStatus
-            future.map { status =>
-              val prefix = "rdd_" + id.toString
-              val storageStatusList = status.asInstanceOf[ArrayBuffer[StorageStatus]].toArray
-              val filteredStorageStatusList = StorageUtils.
-                filterStorageStatusByPrefix(storageStatusList, prefix)
-              val rddInfo = StorageUtils.rddInfoFromStorageStatus(filteredStorageStatusList, sc).head
-              spark.storage.html.rdd.render(rddInfo, filteredStorageStatusList)
-            }
+            val prefix = "rdd_" + id.toString
+            val storageStatusList = sc.getSlavesStorageStatus
+            val filteredStorageStatusList = StorageUtils.
+              filterStorageStatusByPrefix(storageStatusList, prefix)
+            val rddInfo = StorageUtils.rddInfoFromStorageStatus(filteredStorageStatusList, sc).head
+            spark.storage.html.rdd.render(rddInfo, filteredStorageStatusList)
           }
         }
       } ~
diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/spark/storage/StorageUtils.scala
index a10e3a95c6..d6e33c8619 100644
--- a/core/src/main/scala/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/spark/storage/StorageUtils.scala
@@ -56,9 +56,11 @@ object StorageUtils {
       // Find the id of the RDD, e.g. rdd_1 => 1
       val rddId = rddKey.split("_").last.toInt
       // Get the friendly name for the rdd, if available.
-      val rddName = Option(sc.persistentRdds(rddId).name).getOrElse(rddKey)
-      val rddStorageLevel = sc.persistentRdds(rddId).getStorageLevel
-      
+      val rdd = sc.persistentRdds(rddId)
+      val rddName = Option(rdd.name).getOrElse(rddKey)
+      val rddStorageLevel = rdd.getStorageLevel
+      //TODO get total number of partitions in rdd
+
       RDDInfo(rddId, rddName, rddStorageLevel, rddBlocks.length, memSize, diskSize)
     }.toArray
   }
@@ -75,4 +77,4 @@ object StorageUtils {
 
   }
 
-}
\ No newline at end of file
+}

From 0f22c4207f27bc8d1675af82f873141dda754f5c Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Mon, 28 Jan 2013 10:08:59 -0800
Subject: [PATCH 218/291] better formatting for RDDInfo

---
 core/src/main/scala/spark/storage/StorageUtils.scala | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/spark/storage/StorageUtils.scala
index d6e33c8619..ce7c067eea 100644
--- a/core/src/main/scala/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/spark/storage/StorageUtils.scala
@@ -1,6 +1,6 @@
 package spark.storage
 
-import spark.SparkContext
+import spark.{Utils, SparkContext}
 import BlockManagerMasterActor.BlockStatus
 
 private[spark]
@@ -22,8 +22,14 @@ case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long,
 }
 
 case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel,
-  numPartitions: Int, memSize: Long, diskSize: Long)
-
+  numPartitions: Int, memSize: Long, diskSize: Long) {
+  override def toString = {
+    import Utils.memoryBytesToString
+    import java.lang.{Integer => JInt}
+    String.format("RDD \"%s\" (%d) Storage: %s; Partitions: %d; MemorySize: %s; DiskSize: %s", name, id.asInstanceOf[JInt],
+      storageLevel.toString, numPartitions.asInstanceOf[JInt], memoryBytesToString(memSize), memoryBytesToString(diskSize))
+  }
+}
 
 /* Helper methods for storage-related objects */
 private[spark]

From efff7bfb3382f4e07f9fad0e6e647c0ec629355e Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Mon, 28 Jan 2013 20:23:11 -0800
Subject: [PATCH 219/291] add long and float accumulatorparams

---
 core/src/main/scala/spark/SparkContext.scala     | 10 ++++++++++
 core/src/test/scala/spark/AccumulatorSuite.scala |  6 ++++++
 2 files changed, 16 insertions(+)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 77036c1275..dc9b8688b3 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -673,6 +673,16 @@ object SparkContext {
     def zero(initialValue: Int) = 0
   }
 
+  implicit object LongAccumulatorParam extends AccumulatorParam[Long] {
+    def addInPlace(t1: Long, t2: Long) = t1 + t2
+    def zero(initialValue: Long) = 0l
+  }
+
+  implicit object FloatAccumulatorParam extends AccumulatorParam[Float] {
+    def addInPlace(t1: Float, t2: Float) = t1 + t2
+    def zero(initialValue: Float) = 0f
+  }
+
   // TODO: Add AccumulatorParams for other types, e.g. lists and strings
 
   implicit def rddToPairRDDFunctions[K: ClassManifest, V: ClassManifest](rdd: RDD[(K, V)]) =
diff --git a/core/src/test/scala/spark/AccumulatorSuite.scala b/core/src/test/scala/spark/AccumulatorSuite.scala
index 78d64a44ae..ac8ae7d308 100644
--- a/core/src/test/scala/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/spark/AccumulatorSuite.scala
@@ -17,6 +17,12 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkConte
     val d = sc.parallelize(1 to 20)
     d.foreach{x => acc += x}
     acc.value should be (210)
+
+
+    val longAcc = sc.accumulator(0l)
+    val maxInt = Integer.MAX_VALUE.toLong
+    d.foreach{x => longAcc += maxInt + x}
+    longAcc.value should be (210l + maxInt * 20)
   }
 
   test ("value not assignable from tasks") {

From 1f9b486a8be49ef547ac1532cafd63c4c9d4ddda Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Mon, 28 Jan 2013 20:24:54 -0800
Subject: [PATCH 220/291] Some DEBUG-level log cleanup.

A few changes to make the DEBUG-level logs less
noisy and more readable.

- Moved a few very frequent messages to Trace
- Changed some BlockManger log messages to make them
  more understandable

SPARK-666 #resolve
---
 .../main/scala/spark/scheduler/DAGScheduler.scala  |  8 ++++----
 .../main/scala/spark/storage/BlockManager.scala    | 14 +++++++-------
 .../spark/storage/BlockManagerMasterActor.scala    |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index bd541d4207..f10d7cc84e 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -308,10 +308,10 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       } else {
         // TODO: We might want to run this less often, when we are sure that something has become
         // runnable that wasn't before.
-        logDebug("Checking for newly runnable parent stages")
-        logDebug("running: " + running)
-        logDebug("waiting: " + waiting)
-        logDebug("failed: " + failed)
+        logTrace("Checking for newly runnable parent stages")
+        logTrace("running: " + running)
+        logTrace("waiting: " + waiting)
+        logTrace("failed: " + failed)
         val waiting2 = waiting.toArray
         waiting.clear()
         for (stage <- waiting2.sortBy(_.priority)) {
diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/spark/storage/BlockManager.scala
index 1215d5f5c8..c61fd75c2b 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/spark/storage/BlockManager.scala
@@ -243,7 +243,7 @@ class BlockManager(
     val startTimeMs = System.currentTimeMillis
     var managers = master.getLocations(blockId)
     val locations = managers.map(_.ip)
-    logDebug("Get block locations in " + Utils.getUsedTimeMs(startTimeMs))
+    logDebug("Got block locations in " + Utils.getUsedTimeMs(startTimeMs))
     return locations
   }
 
@@ -253,7 +253,7 @@ class BlockManager(
   def getLocations(blockIds: Array[String]): Array[Seq[String]] = {
     val startTimeMs = System.currentTimeMillis
     val locations = master.getLocations(blockIds).map(_.map(_.ip).toSeq).toArray
-    logDebug("Get multiple block location in " + Utils.getUsedTimeMs(startTimeMs))
+    logDebug("Got multiple block location in " + Utils.getUsedTimeMs(startTimeMs))
     return locations
   }
 
@@ -645,7 +645,7 @@ class BlockManager(
     var size = 0L
 
     myInfo.synchronized {
-      logDebug("Put for block " + blockId + " took " + Utils.getUsedTimeMs(startTimeMs)
+      logTrace("Put for block " + blockId + " took " + Utils.getUsedTimeMs(startTimeMs)
         + " to get into synchronized block")
 
       if (level.useMemory) {
@@ -677,8 +677,10 @@ class BlockManager(
     }
     logDebug("Put block " + blockId + " locally took " + Utils.getUsedTimeMs(startTimeMs))
 
+
     // Replicate block if required
     if (level.replication > 1) {
+      val remoteStartTime = System.currentTimeMillis
       // Serialize the block if not already done
       if (bytesAfterPut == null) {
         if (valuesAfterPut == null) {
@@ -688,12 +690,10 @@ class BlockManager(
         bytesAfterPut = dataSerialize(blockId, valuesAfterPut)
       }
       replicate(blockId, bytesAfterPut, level)
+      logDebug("Put block " + blockId + " remotely took " + Utils.getUsedTimeMs(remoteStartTime))
     }
-
     BlockManager.dispose(bytesAfterPut)
 
-    logDebug("Put block " + blockId + " took " + Utils.getUsedTimeMs(startTimeMs))
-
     return size
   }
 
@@ -978,7 +978,7 @@ object BlockManager extends Logging {
    */
   def dispose(buffer: ByteBuffer) {
     if (buffer != null && buffer.isInstanceOf[MappedByteBuffer]) {
-      logDebug("Unmapping " + buffer)
+      logTrace("Unmapping " + buffer)
       if (buffer.asInstanceOf[DirectBuffer].cleaner() != null) {
         buffer.asInstanceOf[DirectBuffer].cleaner().clean()
       }
diff --git a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
index f88517f1a3..2830bc6297 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
@@ -115,7 +115,7 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
   }
 
   def expireDeadHosts() {
-    logDebug("Checking for hosts with no recent heart beats in BlockManagerMaster.")
+    logTrace("Checking for hosts with no recent heart beats in BlockManagerMaster.")
     val now = System.currentTimeMillis()
     val minSeenTime = now - slaveTimeout
     val toRemove = new HashSet[BlockManagerId]

From 7ee824e42ebaa1fc0b0248e0a35021108625ed14 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Mon, 28 Jan 2013 21:48:32 -0800
Subject: [PATCH 221/291] Units from ms -> s

---
 core/src/main/scala/spark/scheduler/DAGScheduler.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 7ba1f3430a..b8336d9d06 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -413,10 +413,10 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
 
     def markStageAsFinished(stage: Stage) = {
       val serviceTime = stage.submissionTime match {
-        case Some(t) => (System.currentTimeMillis() - t).toString
+        case Some(t) => "%.03f".format((System.currentTimeMillis() - t) / 1000.0)
         case _ => "Unkown"
       }
-      logInfo("%s (%s) finished in %s ms".format(stage, stage.origin, serviceTime))
+      logInfo("%s (%s) finished in %s s".format(stage, stage.origin, serviceTime))
       running -= stage
     }
     event.reason match {

From b45857c965219e2d26f35adb2ea3a2b831fdb77f Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Mon, 28 Jan 2013 23:56:56 -0600
Subject: [PATCH 222/291] Add RDD.toDebugString.

Original idea by Nathan Kronenfeld.
---
 core/src/main/scala/spark/RDD.scala | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 0d3857f9dd..172431c31a 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -638,4 +638,14 @@ abstract class RDD[T: ClassManifest](
   protected[spark] def clearDependencies() {
     dependencies_ = null
   }
+
+  /** A description of this RDD and its recursive dependencies for debugging. */
+  def toDebugString(): String = {
+    def debugString(rdd: RDD[_], prefix: String = ""): Seq[String] = {
+      Seq(prefix + rdd) ++ rdd.dependencies.flatMap(d => debugString(d.rdd, prefix + "  "))
+    }
+    debugString(this).mkString("\n")
+  }
+
+  override def toString() = "%s[%d] at %s".format(getClass.getSimpleName, id, origin)
 }

From 951cfd9ba2a9239a777f156f10af820e9df49606 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 29 Jan 2013 00:02:17 -0600
Subject: [PATCH 223/291] Add JavaRDDLike.toDebugString().

---
 core/src/main/scala/spark/api/java/JavaRDDLike.scala | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/core/src/main/scala/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
index 4c95c989b5..44f778e5c2 100644
--- a/core/src/main/scala/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
@@ -330,4 +330,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends PairFlatMapWorkaround
       case _ => Optional.absent()
     }
   }
+
+  /** A description of this RDD and its recursive dependencies for debugging. */
+  def toDebugString(): String = {
+    rdd.toDebugString()
+  }
 }

From 3cda14af3fea97c2372c7335505e9dad7e0dd117 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 29 Jan 2013 00:12:31 -0600
Subject: [PATCH 224/291] Add number of splits.

---
 core/src/main/scala/spark/RDD.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 172431c31a..39bacd2afb 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -642,7 +642,8 @@ abstract class RDD[T: ClassManifest](
   /** A description of this RDD and its recursive dependencies for debugging. */
   def toDebugString(): String = {
     def debugString(rdd: RDD[_], prefix: String = ""): Seq[String] = {
-      Seq(prefix + rdd) ++ rdd.dependencies.flatMap(d => debugString(d.rdd, prefix + "  "))
+      Seq(prefix + rdd + " (" + rdd.splits.size + " splits)") ++
+        rdd.dependencies.flatMap(d => debugString(d.rdd, prefix + "  "))
     }
     debugString(this).mkString("\n")
   }

From cbf72bffa5874319c7ee7117a073e9d01fa51585 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 29 Jan 2013 00:20:36 -0600
Subject: [PATCH 225/291] Include name, if set, in RDD.toString().

---
 core/src/main/scala/spark/RDD.scala | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 39bacd2afb..a23441483e 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -648,5 +648,10 @@ abstract class RDD[T: ClassManifest](
     debugString(this).mkString("\n")
   }
 
-  override def toString() = "%s[%d] at %s".format(getClass.getSimpleName, id, origin)
+  override def toString(): String = "%s%s[%d] at %s".format(
+    Option(name).map(_ + " ").getOrElse(""),
+    getClass.getSimpleName,
+    id,
+    origin)
+
 }

From b29599e5cf0272f0d0e3ceceebb473a8163eab8c Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 28 Jan 2013 22:24:47 -0800
Subject: [PATCH 226/291] Fix code that depended on metadata cleaner interval
 being in minutes

---
 streaming/src/main/scala/spark/streaming/DStream.scala    | 8 ++++----
 .../src/main/scala/spark/streaming/StreamingContext.scala | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index b11ef443dc..352f83fe0c 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -198,10 +198,10 @@ abstract class DStream[T: ClassManifest] (
       metadataCleanerDelay < 0 || rememberDuration.milliseconds < metadataCleanerDelay * 1000,
       "It seems you are doing some DStream window operation or setting a checkpoint interval " +
         "which requires " + this.getClass.getSimpleName + " to remember generated RDDs for more " +
-        "than " + rememberDuration.milliseconds + " milliseconds. But the Spark's metadata cleanup" +
-        "delay is set to " + (metadataCleanerDelay / 60.0) + " minutes, which is not sufficient. Please set " +
-        "the Java property 'spark.cleaner.delay' to more than " +
-        math.ceil(rememberDuration.milliseconds.toDouble / 60000.0).toInt + " minutes."
+        "than " + rememberDuration.milliseconds / 1000 + " seconds. But Spark's metadata cleanup" +
+        "delay is set to " + metadataCleanerDelay + " seconds, which is not sufficient. Please " +
+        "set the Java property 'spark.cleaner.delay' to more than " +
+        math.ceil(rememberDuration.milliseconds / 1000.0).toInt + " seconds."
     )
 
     dependencies.foreach(_.validate())
diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
index 14500bdcb1..37ba524b48 100644
--- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
@@ -389,7 +389,7 @@ object StreamingContext {
     // Set the default cleaner delay to an hour if not already set.
     // This should be sufficient for even 1 second interval.
     if (MetadataCleaner.getDelaySeconds < 0) {
-      MetadataCleaner.setDelaySeconds(60)
+      MetadataCleaner.setDelaySeconds(3600)
     }
     new SparkContext(master, frameworkName)
   }

From 64ba6a8c2c5f46e6de6deb6a6fd576a55cb3b198 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 28 Jan 2013 22:30:12 -0800
Subject: [PATCH 227/291] Simplify checkpointing code and RDD class a little:

- RDD's getDependencies and getSplits methods are now guaranteed to be
  called only once, so subclasses can safely do computation in there
  without worrying about caching the results.

- The management of a "splits_" variable that is cleared out when we
  checkpoint an RDD is now done in the RDD class.

- A few of the RDD subclasses are simpler.

- CheckpointRDD's compute() method no longer assumes that it is given a
  CheckpointRDDSplit -- it can work just as well on a split from the
  original RDD, because it only looks at its index. This is important
  because things like UnionRDD and ZippedRDD remember the parent's
  splits as part of their own and wouldn't work on checkpointed parents.

- RDD.iterator can now reuse cached data if an RDD is computed before it
  is checkpointed. It seems like it wouldn't do this before (it always
  called iterator() on the CheckpointRDD, which read from HDFS).
---
 core/src/main/scala/spark/CacheManager.scala  |   6 +-
 .../main/scala/spark/PairRDDFunctions.scala   |   4 +-
 core/src/main/scala/spark/RDD.scala           | 130 ++++++++++--------
 .../main/scala/spark/RDDCheckpointData.scala  |  19 +--
 .../scala/spark/api/java/JavaRDDLike.scala    |   2 +-
 .../main/scala/spark/rdd/CartesianRDD.scala   |  12 +-
 .../main/scala/spark/rdd/CheckpointRDD.scala  |  61 ++++----
 .../main/scala/spark/rdd/CoalescedRDD.scala   |  14 +-
 core/src/main/scala/spark/rdd/MappedRDD.scala |   6 +-
 .../scala/spark/rdd/PartitionPruningRDD.scala |  13 +-
 .../main/scala/spark/rdd/ShuffledRDD.scala    |   8 +-
 core/src/main/scala/spark/rdd/UnionRDD.scala  |  14 +-
 core/src/main/scala/spark/rdd/ZippedRDD.scala |   7 +-
 .../scala/spark/util/MetadataCleaner.scala    |   4 +-
 .../test/scala/spark/CheckpointSuite.scala    |  21 +--
 15 files changed, 153 insertions(+), 168 deletions(-)

diff --git a/core/src/main/scala/spark/CacheManager.scala b/core/src/main/scala/spark/CacheManager.scala
index a0b53fd9d6..711435c333 100644
--- a/core/src/main/scala/spark/CacheManager.scala
+++ b/core/src/main/scala/spark/CacheManager.scala
@@ -10,9 +10,9 @@ import spark.storage.{BlockManager, StorageLevel}
 private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
   private val loading = new HashSet[String]
 
-  /** Gets or computes an RDD split. Used by RDD.iterator() when a RDD is cached. */
+  /** Gets or computes an RDD split. Used by RDD.iterator() when an RDD is cached. */
   def getOrCompute[T](rdd: RDD[T], split: Split, context: TaskContext, storageLevel: StorageLevel)
-  : Iterator[T] = {
+      : Iterator[T] = {
     val key = "rdd_%d_%d".format(rdd.id, split.index)
     logInfo("Cache key is " + key)
     blockManager.get(key) match {
@@ -50,7 +50,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
           // If we got here, we have to load the split
           val elements = new ArrayBuffer[Any]
           logInfo("Computing partition " + split)
-          elements ++= rdd.compute(split, context)
+          elements ++= rdd.computeOrReadCheckpoint(split, context)
           // Try to put this block in the blockManager
           blockManager.put(key, elements, storageLevel, true)
           return elements.iterator.asInstanceOf[Iterator[T]]
diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala
index 53b051f1c5..231e23a7de 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/spark/PairRDDFunctions.scala
@@ -649,9 +649,7 @@ class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest](
 }
 
 private[spark]
-class MappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => U)
-  extends RDD[(K, U)](prev) {
-
+class MappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => U) extends RDD[(K, U)](prev) {
   override def getSplits = firstParent[(K, V)].splits
   override val partitioner = firstParent[(K, V)].partitioner
   override def compute(split: Split, context: TaskContext) =
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 0d3857f9dd..dbad6d4c83 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -1,27 +1,17 @@
 package spark
 
-import java.io.{ObjectOutputStream, IOException, EOFException, ObjectInputStream}
 import java.net.URL
 import java.util.{Date, Random}
 import java.util.{HashMap => JHashMap}
-import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.Map
 import scala.collection.JavaConversions.mapAsScalaMap
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 
-import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.BytesWritable
 import org.apache.hadoop.io.NullWritable
 import org.apache.hadoop.io.Text
-import org.apache.hadoop.io.Writable
-import org.apache.hadoop.mapred.FileOutputCommitter
-import org.apache.hadoop.mapred.HadoopWriter
-import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapred.OutputCommitter
-import org.apache.hadoop.mapred.OutputFormat
-import org.apache.hadoop.mapred.SequenceFileOutputFormat
 import org.apache.hadoop.mapred.TextOutputFormat
 
 import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap}
@@ -30,7 +20,6 @@ import spark.partial.BoundedDouble
 import spark.partial.CountEvaluator
 import spark.partial.GroupedCountEvaluator
 import spark.partial.PartialResult
-import spark.rdd.BlockRDD
 import spark.rdd.CartesianRDD
 import spark.rdd.FilteredRDD
 import spark.rdd.FlatMappedRDD
@@ -73,11 +62,11 @@ import SparkContext._
  * on RDD internals.
  */
 abstract class RDD[T: ClassManifest](
-    @transient var sc: SparkContext,
-    var dependencies_ : List[Dependency[_]]
+    @transient private var sc: SparkContext,
+    @transient private var deps: Seq[Dependency[_]]
   ) extends Serializable with Logging {
 
-
+  /** Construct an RDD with just a one-to-one dependency on one parent */
   def this(@transient oneParent: RDD[_]) =
     this(oneParent.context , List(new OneToOneDependency(oneParent)))
 
@@ -85,25 +74,27 @@ abstract class RDD[T: ClassManifest](
   // Methods that should be implemented by subclasses of RDD
   // =======================================================================
 
-  /** Function for computing a given partition. */
+  /** Implemented by subclasses to compute a given partition. */
   def compute(split: Split, context: TaskContext): Iterator[T]
 
-  /** Set of partitions in this RDD. */
-  protected def getSplits(): Array[Split]
+  /**
+   * Implemented by subclasses to return the set of partitions in this RDD. This method will only
+   * be called once, so it is safe to implement a time-consuming computation in it.
+   */
+  protected def getSplits: Array[Split]
 
-  /** How this RDD depends on any parent RDDs. */
-  protected def getDependencies(): List[Dependency[_]] = dependencies_
+  /**
+   * Implemented by subclasses to return how this RDD depends on parent RDDs. This method will only
+   * be called once, so it is safe to implement a time-consuming computation in it.
+   */
+  protected def getDependencies: Seq[Dependency[_]] = deps
 
-  /** A friendly name for this RDD */
-  var name: String = null
-  
   /** Optionally overridden by subclasses to specify placement preferences. */
   protected def getPreferredLocations(split: Split): Seq[String] = Nil
 
   /** Optionally overridden by subclasses to specify how they are partitioned. */
   val partitioner: Option[Partitioner] = None
 
-
   // =======================================================================
   // Methods and fields available on all RDDs
   // =======================================================================
@@ -111,13 +102,16 @@ abstract class RDD[T: ClassManifest](
   /** A unique ID for this RDD (within its SparkContext). */
   val id = sc.newRddId()
 
+  /** A friendly name for this RDD */
+  var name: String = null
+
   /** Assign a name to this RDD */
   def setName(_name: String) = {
     name = _name
     this
   }
 
-  /** 
+  /**
    * Set this RDD's storage level to persist its values across operations after the first time
    * it is computed. Can only be called once on each RDD.
    */
@@ -142,15 +136,24 @@ abstract class RDD[T: ClassManifest](
   /** Get the RDD's current storage level, or StorageLevel.NONE if none is set. */
   def getStorageLevel = storageLevel
 
+  // Our dependencies and splits will be gotten by calling subclass's methods below, and will
+  // be overwritten when we're checkpointed
+  private var dependencies_ : Seq[Dependency[_]] = null
+  @transient private var splits_ : Array[Split] = null
+
+  /** An Option holding our checkpoint RDD, if we are checkpointed */
+  private def checkpointRDD: Option[RDD[T]] = checkpointData.flatMap(_.checkpointRDD)
+
   /**
-   * Get the preferred location of a split, taking into account whether the
+   * Get the list of dependencies of this RDD, taking into account whether the
    * RDD is checkpointed or not.
    */
-  final def preferredLocations(split: Split): Seq[String] = {
-    if (isCheckpointed) {
-      checkpointData.get.getPreferredLocations(split)
-    } else {
-      getPreferredLocations(split)
+  final def dependencies: Seq[Dependency[_]] = {
+    checkpointRDD.map(r => List(new OneToOneDependency(r))).getOrElse {
+      if (dependencies_ == null) {
+        dependencies_ = getDependencies
+      }
+      dependencies_
     }
   }
 
@@ -159,22 +162,21 @@ abstract class RDD[T: ClassManifest](
    * RDD is checkpointed or not.
    */
   final def splits: Array[Split] = {
-    if (isCheckpointed) {
-      checkpointData.get.getSplits
-    } else {
-      getSplits
+    checkpointRDD.map(_.splits).getOrElse {
+      if (splits_ == null) {
+        splits_ = getSplits
+      }
+      splits_
     }
   }
 
   /**
-   * Get the list of dependencies of this RDD, taking into account whether the
+   * Get the preferred location of a split, taking into account whether the
    * RDD is checkpointed or not.
    */
-  final def dependencies: List[Dependency[_]] = {
-    if (isCheckpointed) {
-      dependencies_
-    } else {
-      getDependencies
+  final def preferredLocations(split: Split): Seq[String] = {
+    checkpointRDD.map(_.getPreferredLocations(split)).getOrElse {
+      getPreferredLocations(split)
     }
   }
 
@@ -184,10 +186,19 @@ abstract class RDD[T: ClassManifest](
    * subclasses of RDD.
    */
   final def iterator(split: Split, context: TaskContext): Iterator[T] = {
-    if (isCheckpointed) {
-      checkpointData.get.iterator(split, context)
-    } else if (storageLevel != StorageLevel.NONE) {
+    if (storageLevel != StorageLevel.NONE) {
       SparkEnv.get.cacheManager.getOrCompute(this, split, context, storageLevel)
+    } else {
+      computeOrReadCheckpoint(split, context)
+    }
+  }
+
+  /**
+   * Compute an RDD partition or read it from a checkpoint if the RDD is checkpointing.
+   */
+  private[spark] def computeOrReadCheckpoint(split: Split, context: TaskContext): Iterator[T] = {
+    if (isCheckpointed) {
+      firstParent[T].iterator(split, context)
     } else {
       compute(split, context)
     }
@@ -578,15 +589,15 @@ abstract class RDD[T: ClassManifest](
   /**
    * Return whether this RDD has been checkpointed or not
    */
-  def isCheckpointed(): Boolean = {
-    if (checkpointData.isDefined) checkpointData.get.isCheckpointed() else false
+  def isCheckpointed: Boolean = {
+    checkpointData.map(_.isCheckpointed).getOrElse(false)
   }
 
   /**
    * Gets the name of the file to which this RDD was checkpointed
    */
-  def getCheckpointFile(): Option[String] = {
-    if (checkpointData.isDefined) checkpointData.get.getCheckpointFile() else None
+  def getCheckpointFile: Option[String] = {
+    checkpointData.flatMap(_.getCheckpointFile)
   }
 
   // =======================================================================
@@ -611,31 +622,36 @@ abstract class RDD[T: ClassManifest](
   def context = sc
 
   /**
-   * Performs the checkpointing of this RDD by saving this . It is called by the DAGScheduler
+   * Performs the checkpointing of this RDD by saving this. It is called by the DAGScheduler
    * after a job using this RDD has completed (therefore the RDD has been materialized and
    * potentially stored in memory). doCheckpoint() is called recursively on the parent RDDs.
    */
-  protected[spark] def doCheckpoint() {
-    if (checkpointData.isDefined) checkpointData.get.doCheckpoint()
-    dependencies.foreach(_.rdd.doCheckpoint())
+  private[spark] def doCheckpoint() {
+    if (checkpointData.isDefined) {
+      checkpointData.get.doCheckpoint()
+    } else {
+      dependencies.foreach(_.rdd.doCheckpoint())
+    }
   }
 
   /**
-   * Changes the dependencies of this RDD from its original parents to the new RDD
-   * (`newRDD`) created from the checkpoint file.
+   * Changes the dependencies of this RDD from its original parents to a new RDD (`newRDD`)
+   * created from the checkpoint file, and forget its old dependencies and splits.
    */
-  protected[spark] def changeDependencies(newRDD: RDD[_]) {
+  private[spark] def markCheckpointed(checkpointRDD: RDD[_]) {
     clearDependencies()
-    dependencies_ = List(new OneToOneDependency(newRDD))
+    dependencies_ = null
+    splits_ = null
+    deps = null    // Forget the constructor argument for dependencies too
   }
 
   /**
    * Clears the dependencies of this RDD. This method must ensure that all references
    * to the original parent RDDs is removed to enable the parent RDDs to be garbage
    * collected. Subclasses of RDD may override this method for implementing their own cleaning
-   * logic. See [[spark.rdd.UnionRDD]] and [[spark.rdd.ShuffledRDD]] to get a better idea.
+   * logic. See [[spark.rdd.UnionRDD]] for an example.
    */
-  protected[spark] def clearDependencies() {
+  protected def clearDependencies() {
     dependencies_ = null
   }
 }
diff --git a/core/src/main/scala/spark/RDDCheckpointData.scala b/core/src/main/scala/spark/RDDCheckpointData.scala
index 18df530b7d..a4a4ebaf53 100644
--- a/core/src/main/scala/spark/RDDCheckpointData.scala
+++ b/core/src/main/scala/spark/RDDCheckpointData.scala
@@ -20,7 +20,7 @@ private[spark] object CheckpointState extends Enumeration {
  * of the checkpointed RDD.
  */
 private[spark] class RDDCheckpointData[T: ClassManifest](rdd: RDD[T])
-extends Logging with Serializable {
+  extends Logging with Serializable {
 
   import CheckpointState._
 
@@ -31,7 +31,7 @@ extends Logging with Serializable {
   @transient var cpFile: Option[String] = None
 
   // The CheckpointRDD created from the checkpoint file, that is, the new parent the associated RDD.
-  @transient var cpRDD: Option[RDD[T]] = None
+  var cpRDD: Option[RDD[T]] = None
 
   // Mark the RDD for checkpointing
   def markForCheckpoint() {
@@ -41,12 +41,12 @@ extends Logging with Serializable {
   }
 
   // Is the RDD already checkpointed
-  def isCheckpointed(): Boolean = {
+  def isCheckpointed: Boolean = {
     RDDCheckpointData.synchronized { cpState == Checkpointed }
   }
 
   // Get the file to which this RDD was checkpointed to as an Option
-  def getCheckpointFile(): Option[String] = {
+  def getCheckpointFile: Option[String] = {
     RDDCheckpointData.synchronized { cpFile }
   }
 
@@ -71,7 +71,7 @@ extends Logging with Serializable {
     RDDCheckpointData.synchronized {
       cpFile = Some(path)
       cpRDD = Some(newRDD)
-      rdd.changeDependencies(newRDD)
+      rdd.markCheckpointed(newRDD)   // Update the RDD's dependencies and splits
       cpState = Checkpointed
       RDDCheckpointData.clearTaskCaches()
       logInfo("Done checkpointing RDD " + rdd.id + ", new parent is RDD " + newRDD.id)
@@ -79,7 +79,7 @@ extends Logging with Serializable {
   }
 
   // Get preferred location of a split after checkpointing
-  def getPreferredLocations(split: Split) = {
+  def getPreferredLocations(split: Split): Seq[String] = {
     RDDCheckpointData.synchronized {
       cpRDD.get.preferredLocations(split)
     }
@@ -91,9 +91,10 @@ extends Logging with Serializable {
     }
   }
 
-  // Get iterator. This is called at the worker nodes.
-  def iterator(split: Split, context: TaskContext): Iterator[T] = {
-    rdd.firstParent[T].iterator(split, context)
+  def checkpointRDD: Option[RDD[T]] = {
+    RDDCheckpointData.synchronized {
+      cpRDD
+    }
   }
 }
 
diff --git a/core/src/main/scala/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
index 4c95c989b5..46fd8fe85e 100644
--- a/core/src/main/scala/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/spark/api/java/JavaRDDLike.scala
@@ -319,7 +319,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends PairFlatMapWorkaround
   /**
    * Return whether this RDD has been checkpointed or not
    */
-  def isCheckpointed(): Boolean = rdd.isCheckpointed()
+  def isCheckpointed: Boolean = rdd.isCheckpointed
 
   /**
    * Gets the name of the file to which this RDD was checkpointed
diff --git a/core/src/main/scala/spark/rdd/CartesianRDD.scala b/core/src/main/scala/spark/rdd/CartesianRDD.scala
index 453d410ad4..0f9ca06531 100644
--- a/core/src/main/scala/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/spark/rdd/CartesianRDD.scala
@@ -1,7 +1,7 @@
 package spark.rdd
 
 import java.io.{ObjectOutputStream, IOException}
-import spark.{OneToOneDependency, NarrowDependency, RDD, SparkContext, Split, TaskContext}
+import spark._
 
 
 private[spark]
@@ -35,7 +35,7 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest](
 
   val numSplitsInRdd2 = rdd2.splits.size
 
-  @transient var splits_ = {
+  override def getSplits: Array[Split] = {
     // create the cross product split
     val array = new Array[Split](rdd1.splits.size * rdd2.splits.size)
     for (s1 <- rdd1.splits; s2 <- rdd2.splits) {
@@ -45,8 +45,6 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest](
     array
   }
 
-  override def getSplits = splits_
-
   override def getPreferredLocations(split: Split) = {
     val currSplit = split.asInstanceOf[CartesianSplit]
     rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)
@@ -58,7 +56,7 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest](
       y <- rdd2.iterator(currSplit.s2, context)) yield (x, y)
   }
 
-  var deps_ = List(
+  override def getDependencies: Seq[Dependency[_]] = List(
     new NarrowDependency(rdd1) {
       def getParents(id: Int): Seq[Int] = List(id / numSplitsInRdd2)
     },
@@ -67,11 +65,7 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest](
     }
   )
 
-  override def getDependencies = deps_
-
   override def clearDependencies() {
-    deps_ = Nil
-    splits_ = null
     rdd1 = null
     rdd2 = null
   }
diff --git a/core/src/main/scala/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/spark/rdd/CheckpointRDD.scala
index 6f00f6ac73..96b593ba7c 100644
--- a/core/src/main/scala/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/spark/rdd/CheckpointRDD.scala
@@ -9,23 +9,26 @@ import org.apache.hadoop.fs.Path
 import java.io.{File, IOException, EOFException}
 import java.text.NumberFormat
 
-private[spark] class CheckpointRDDSplit(idx: Int, val splitFile: String) extends Split {
-  override val index: Int = idx
-}
+private[spark] class CheckpointRDDSplit(val index: Int) extends Split {}
 
 /**
  * This RDD represents a RDD checkpoint file (similar to HadoopRDD).
  */
 private[spark]
-class CheckpointRDD[T: ClassManifest](sc: SparkContext, checkpointPath: String)
+class CheckpointRDD[T: ClassManifest](sc: SparkContext, val checkpointPath: String)
   extends RDD[T](sc, Nil) {
 
-  @transient val path = new Path(checkpointPath)
-  @transient val fs = path.getFileSystem(new Configuration())
+  @transient val fs = new Path(checkpointPath).getFileSystem(sc.hadoopConfiguration)
 
   @transient val splits_ : Array[Split] = {
-    val splitFiles = fs.listStatus(path).map(_.getPath.toString).filter(_.contains("part-")).sorted
-    splitFiles.zipWithIndex.map(x => new CheckpointRDDSplit(x._2, x._1)).toArray
+    val dirContents = fs.listStatus(new Path(checkpointPath))
+    val splitFiles = dirContents.map(_.getPath.toString).filter(_.contains("part-")).sorted
+    val numSplits = splitFiles.size
+    if (!splitFiles(0).endsWith(CheckpointRDD.splitIdToFile(0)) ||
+        !splitFiles(numSplits-1).endsWith(CheckpointRDD.splitIdToFile(numSplits-1))) {
+      throw new SparkException("Invalid checkpoint directory: " + checkpointPath)
+    }
+    Array.tabulate(numSplits)(i => new CheckpointRDDSplit(i))
   }
 
   checkpointData = Some(new RDDCheckpointData[T](this))
@@ -34,36 +37,34 @@ class CheckpointRDD[T: ClassManifest](sc: SparkContext, checkpointPath: String)
   override def getSplits = splits_
 
   override def getPreferredLocations(split: Split): Seq[String] = {
-    val status = fs.getFileStatus(path)
+    val status = fs.getFileStatus(new Path(checkpointPath))
     val locations = fs.getFileBlockLocations(status, 0, status.getLen)
-    locations.firstOption.toList.flatMap(_.getHosts).filter(_ != "localhost")
+    locations.headOption.toList.flatMap(_.getHosts).filter(_ != "localhost")
   }
 
   override def compute(split: Split, context: TaskContext): Iterator[T] = {
-    CheckpointRDD.readFromFile(split.asInstanceOf[CheckpointRDDSplit].splitFile, context)
+    val file = new Path(checkpointPath, CheckpointRDD.splitIdToFile(split.index))
+    CheckpointRDD.readFromFile(file, context)
   }
 
   override def checkpoint() {
-    // Do nothing. Hadoop RDD should not be checkpointed.
+    // Do nothing. CheckpointRDD should not be checkpointed.
   }
 }
 
 private[spark] object CheckpointRDD extends Logging {
 
-  def splitIdToFileName(splitId: Int): String = {
-    val numfmt = NumberFormat.getInstance()
-    numfmt.setMinimumIntegerDigits(5)
-    numfmt.setGroupingUsed(false)
-    "part-"  + numfmt.format(splitId)
+  def splitIdToFile(splitId: Int): String = {
+    "part-%05d".format(splitId)
   }
 
-  def writeToFile[T](path: String, blockSize: Int = -1)(context: TaskContext, iterator: Iterator[T]) {
+  def writeToFile[T](path: String, blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) {
     val outputDir = new Path(path)
     val fs = outputDir.getFileSystem(new Configuration())
 
-    val finalOutputName = splitIdToFileName(context.splitId)
+    val finalOutputName = splitIdToFile(ctx.splitId)
     val finalOutputPath = new Path(outputDir, finalOutputName)
-    val tempOutputPath = new Path(outputDir, "." + finalOutputName + "-attempt-" + context.attemptId)
+    val tempOutputPath = new Path(outputDir, "." + finalOutputName + "-attempt-" + ctx.attemptId)
 
     if (fs.exists(tempOutputPath)) {
       throw new IOException("Checkpoint failed: temporary path " +
@@ -83,22 +84,22 @@ private[spark] object CheckpointRDD extends Logging {
     serializeStream.close()
 
     if (!fs.rename(tempOutputPath, finalOutputPath)) {
-      if (!fs.delete(finalOutputPath, true)) {
-        throw new IOException("Checkpoint failed: failed to delete earlier output of task "
-          + context.attemptId)
-      }
-      if (!fs.rename(tempOutputPath, finalOutputPath)) {
+      if (!fs.exists(finalOutputPath)) {
+        fs.delete(tempOutputPath, false)
         throw new IOException("Checkpoint failed: failed to save output of task: "
-          + context.attemptId)
+          + ctx.attemptId + " and final output path does not exist")
+      } else {
+        // Some other copy of this task must've finished before us and renamed it
+        logInfo("Final output path " + finalOutputPath + " already exists; not overwriting it")
+        fs.delete(tempOutputPath, false)
       }
     }
   }
 
-  def readFromFile[T](path: String, context: TaskContext): Iterator[T] = {
-    val inputPath = new Path(path)
-    val fs = inputPath.getFileSystem(new Configuration())
+  def readFromFile[T](path: Path, context: TaskContext): Iterator[T] = {
+    val fs = path.getFileSystem(new Configuration())
     val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
-    val fileInputStream = fs.open(inputPath, bufferSize)
+    val fileInputStream = fs.open(path, bufferSize)
     val serializer = SparkEnv.get.serializer.newInstance()
     val deserializeStream = serializer.deserializeStream(fileInputStream)
 
diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala
index 167755bbba..4c57434b65 100644
--- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala
@@ -27,11 +27,11 @@ private[spark] case class CoalescedRDDSplit(
  * or to avoid having a large number of small tasks when processing a directory with many files.
  */
 class CoalescedRDD[T: ClassManifest](
-    var prev: RDD[T],
+    @transient var prev: RDD[T],
     maxPartitions: Int)
-  extends RDD[T](prev.context, Nil) {  // Nil, so the dependencies_ var does not refer to parent RDDs
+  extends RDD[T](prev.context, Nil) {  // Nil since we implement getDependencies
 
-  @transient var splits_ : Array[Split] = {
+  override def getSplits: Array[Split] = {
     val prevSplits = prev.splits
     if (prevSplits.length < maxPartitions) {
       prevSplits.map(_.index).map{idx => new CoalescedRDDSplit(idx, prev, Array(idx)) }
@@ -44,26 +44,20 @@ class CoalescedRDD[T: ClassManifest](
     }
   }
 
-  override def getSplits = splits_
-
   override def compute(split: Split, context: TaskContext): Iterator[T] = {
     split.asInstanceOf[CoalescedRDDSplit].parents.iterator.flatMap { parentSplit =>
       firstParent[T].iterator(parentSplit, context)
     }
   }
 
-  var deps_ : List[Dependency[_]] = List(
+  override def getDependencies: Seq[Dependency[_]] = List(
     new NarrowDependency(prev) {
       def getParents(id: Int): Seq[Int] =
         splits(id).asInstanceOf[CoalescedRDDSplit].parentsIndices
     }
   )
 
-  override def getDependencies() = deps_
-
   override def clearDependencies() {
-    deps_ = Nil
-    splits_ = null
     prev = null
   }
 }
diff --git a/core/src/main/scala/spark/rdd/MappedRDD.scala b/core/src/main/scala/spark/rdd/MappedRDD.scala
index c6ceb272cd..5466c9c657 100644
--- a/core/src/main/scala/spark/rdd/MappedRDD.scala
+++ b/core/src/main/scala/spark/rdd/MappedRDD.scala
@@ -3,13 +3,11 @@ package spark.rdd
 import spark.{RDD, Split, TaskContext}
 
 private[spark]
-class MappedRDD[U: ClassManifest, T: ClassManifest](
-    prev: RDD[T],
-    f: T => U)
+class MappedRDD[U: ClassManifest, T: ClassManifest](prev: RDD[T], f: T => U)
   extends RDD[U](prev) {
 
   override def getSplits = firstParent[T].splits
 
   override def compute(split: Split, context: TaskContext) =
     firstParent[T].iterator(split, context).map(f)
-}
\ No newline at end of file
+}
diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
index 97dd37950e..b8482338c6 100644
--- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
@@ -7,23 +7,18 @@ import spark.{PruneDependency, RDD, SparkEnv, Split, TaskContext}
  * all partitions. An example use case: If we know the RDD is partitioned by range,
  * and the execution DAG has a filter on the key, we can avoid launching tasks
  * on partitions that don't have the range covering the key.
+ *
+ * TODO: This currently doesn't give partition IDs properly!
  */
 class PartitionPruningRDD[T: ClassManifest](
     @transient prev: RDD[T],
     @transient partitionFilterFunc: Int => Boolean)
   extends RDD[T](prev.context, List(new PruneDependency(prev, partitionFilterFunc))) {
 
-  @transient
-  var partitions_ : Array[Split] = dependencies_.head.asInstanceOf[PruneDependency[T]].partitions
-
   override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(split, context)
 
-  override protected def getSplits = partitions_
+  override protected def getSplits =
+    getDependencies.head.asInstanceOf[PruneDependency[T]].partitions
 
   override val partitioner = firstParent[T].partitioner
-
-  override def clearDependencies() {
-    super.clearDependencies()
-    partitions_ = null
-  }
 }
diff --git a/core/src/main/scala/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/spark/rdd/ShuffledRDD.scala
index 28ff19876d..d396478673 100644
--- a/core/src/main/scala/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/spark/rdd/ShuffledRDD.scala
@@ -22,16 +22,10 @@ class ShuffledRDD[K, V](
 
   override val partitioner = Some(part)
 
-  @transient var splits_ = Array.tabulate[Split](part.numPartitions)(i => new ShuffledRDDSplit(i))
-
-  override def getSplits = splits_
+  override def getSplits = Array.tabulate[Split](part.numPartitions)(i => new ShuffledRDDSplit(i))
 
   override def compute(split: Split, context: TaskContext): Iterator[(K, V)] = {
     val shuffledId = dependencies.head.asInstanceOf[ShuffleDependency[K, V]].shuffleId
     SparkEnv.get.shuffleFetcher.fetch[K, V](shuffledId, split.index)
   }
-
-  override def clearDependencies() {
-    splits_ = null
-  }
 }
diff --git a/core/src/main/scala/spark/rdd/UnionRDD.scala b/core/src/main/scala/spark/rdd/UnionRDD.scala
index 82f0a44ecd..26a2d511f2 100644
--- a/core/src/main/scala/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/spark/rdd/UnionRDD.scala
@@ -26,9 +26,9 @@ private[spark] class UnionSplit[T: ClassManifest](idx: Int, rdd: RDD[T], splitIn
 class UnionRDD[T: ClassManifest](
     sc: SparkContext,
     @transient var rdds: Seq[RDD[T]])
-  extends RDD[T](sc, Nil) {  // Nil, so the dependencies_ var does not refer to parent RDDs
+  extends RDD[T](sc, Nil) {  // Nil since we implement getDependencies
 
-  @transient var splits_ : Array[Split] = {
+  override def getSplits: Array[Split] = {
     val array = new Array[Split](rdds.map(_.splits.size).sum)
     var pos = 0
     for (rdd <- rdds; split <- rdd.splits) {
@@ -38,20 +38,16 @@ class UnionRDD[T: ClassManifest](
     array
   }
 
-  override def getSplits = splits_
-
-  @transient var deps_ = {
+  override def getDependencies: Seq[Dependency[_]] = {
     val deps = new ArrayBuffer[Dependency[_]]
     var pos = 0
     for (rdd <- rdds) {
       deps += new RangeDependency(rdd, 0, pos, rdd.splits.size)
       pos += rdd.splits.size
     }
-    deps.toList
+    deps
   }
 
-  override def getDependencies = deps_
-
   override def compute(s: Split, context: TaskContext): Iterator[T] =
     s.asInstanceOf[UnionSplit[T]].iterator(context)
 
@@ -59,8 +55,6 @@ class UnionRDD[T: ClassManifest](
     s.asInstanceOf[UnionSplit[T]].preferredLocations()
 
   override def clearDependencies() {
-    deps_ = null
-    splits_ = null
     rdds = null
   }
 }
diff --git a/core/src/main/scala/spark/rdd/ZippedRDD.scala b/core/src/main/scala/spark/rdd/ZippedRDD.scala
index d950b06c85..e5df6d8c72 100644
--- a/core/src/main/scala/spark/rdd/ZippedRDD.scala
+++ b/core/src/main/scala/spark/rdd/ZippedRDD.scala
@@ -32,9 +32,7 @@ class ZippedRDD[T: ClassManifest, U: ClassManifest](
   extends RDD[(T, U)](sc, List(new OneToOneDependency(rdd1), new OneToOneDependency(rdd2)))
   with Serializable {
 
-  // TODO: FIX THIS.
-
-  @transient var splits_ : Array[Split] = {
+  override def getSplits: Array[Split] = {
     if (rdd1.splits.size != rdd2.splits.size) {
       throw new IllegalArgumentException("Can't zip RDDs with unequal numbers of partitions")
     }
@@ -45,8 +43,6 @@ class ZippedRDD[T: ClassManifest, U: ClassManifest](
     array
   }
 
-  override def getSplits = splits_
-
   override def compute(s: Split, context: TaskContext): Iterator[(T, U)] = {
     val (split1, split2) = s.asInstanceOf[ZippedSplit[T, U]].splits
     rdd1.iterator(split1, context).zip(rdd2.iterator(split2, context))
@@ -58,7 +54,6 @@ class ZippedRDD[T: ClassManifest, U: ClassManifest](
   }
 
   override def clearDependencies() {
-    splits_ = null
     rdd1 = null
     rdd2 = null
   }
diff --git a/core/src/main/scala/spark/util/MetadataCleaner.scala b/core/src/main/scala/spark/util/MetadataCleaner.scala
index 6cf93a9b17..eaff7ae581 100644
--- a/core/src/main/scala/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/spark/util/MetadataCleaner.scala
@@ -26,8 +26,8 @@ class MetadataCleaner(name: String, cleanupFunc: (Long) => Unit) extends Logging
 
   if (delaySeconds > 0) {
     logDebug(
-      "Starting metadata cleaner for " + name + " with delay of " + delaySeconds + " seconds and "
-      + "period of " + periodSeconds + " secs")
+      "Starting metadata cleaner for " + name + " with delay of " + delaySeconds + " seconds " +
+      "and period of " + periodSeconds + " secs")
     timer.schedule(task, periodSeconds * 1000, periodSeconds * 1000)
   }
 
diff --git a/core/src/test/scala/spark/CheckpointSuite.scala b/core/src/test/scala/spark/CheckpointSuite.scala
index 33c317720c..0b74607fb8 100644
--- a/core/src/test/scala/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/spark/CheckpointSuite.scala
@@ -99,7 +99,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     // the parent RDD has been checkpointed and parent splits have been changed to HadoopSplits.
     // Note that this test is very specific to the current implementation of CartesianRDD.
     val ones = sc.makeRDD(1 to 100, 10).map(x => x)
-    ones.checkpoint // checkpoint that MappedRDD
+    ones.checkpoint() // checkpoint that MappedRDD
     val cartesian = new CartesianRDD(sc, ones, ones)
     val splitBeforeCheckpoint =
       serializeDeserialize(cartesian.splits.head.asInstanceOf[CartesianSplit])
@@ -125,7 +125,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     // the parent RDD has been checkpointed and parent splits have been changed to HadoopSplits.
     // Note that this test is very specific to the current implementation of CoalescedRDDSplits
     val ones = sc.makeRDD(1 to 100, 10).map(x => x)
-    ones.checkpoint // checkpoint that MappedRDD
+    ones.checkpoint() // checkpoint that MappedRDD
     val coalesced = new CoalescedRDD(ones, 2)
     val splitBeforeCheckpoint =
       serializeDeserialize(coalesced.splits.head.asInstanceOf[CoalescedRDDSplit])
@@ -160,7 +160,6 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     // so only the RDD will reduce in serialized size, not the splits.
     testParentCheckpointing(
       rdd => new ZippedRDD(sc, rdd, rdd.map(x => x)), true, false)
-
   }
 
   /**
@@ -176,7 +175,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
       testRDDSplitSize: Boolean = false
     ) {
     // Generate the final RDD using given RDD operation
-    val baseRDD = generateLongLineageRDD
+    val baseRDD = generateLongLineageRDD()
     val operatedRDD = op(baseRDD)
     val parentRDD = operatedRDD.dependencies.headOption.orNull
     val rddType = operatedRDD.getClass.getSimpleName
@@ -245,12 +244,16 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
       testRDDSplitSize: Boolean
     ) {
     // Generate the final RDD using given RDD operation
-    val baseRDD = generateLongLineageRDD
+    val baseRDD = generateLongLineageRDD()
     val operatedRDD = op(baseRDD)
     val parentRDD = operatedRDD.dependencies.head.rdd
     val rddType = operatedRDD.getClass.getSimpleName
     val parentRDDType = parentRDD.getClass.getSimpleName
 
+    // Get the splits and dependencies of the parent in case they're lazily computed
+    parentRDD.dependencies
+    parentRDD.splits
+
     // Find serialized sizes before and after the checkpoint
     val (rddSizeBeforeCheckpoint, splitSizeBeforeCheckpoint) = getSerializedSizes(operatedRDD)
     parentRDD.checkpoint()  // checkpoint the parent RDD, not the generated one
@@ -267,7 +270,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     if (testRDDSize) {
       assert(
         rddSizeAfterCheckpoint < rddSizeBeforeCheckpoint,
-        "Size of " + rddType + " did not reduce after parent checkpointing parent " + parentRDDType +
+        "Size of " + rddType + " did not reduce after checkpointing parent " + parentRDDType +
           "[" + rddSizeBeforeCheckpoint + " --> " + rddSizeAfterCheckpoint + "]"
       )
     }
@@ -318,10 +321,12 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
   }
 
   /**
-   * Get serialized sizes of the RDD and its splits
+   * Get serialized sizes of the RDD and its splits, in order to test whether the size shrinks
+   * upon checkpointing. Ignores the checkpointData field, which may grow when we checkpoint.
    */
   def getSerializedSizes(rdd: RDD[_]): (Int, Int) = {
-    (Utils.serialize(rdd).size, Utils.serialize(rdd.splits).size)
+    (Utils.serialize(rdd).length - Utils.serialize(rdd.checkpointData).length,
+     Utils.serialize(rdd.splits).length)
   }
 
   /**

From a34096a76de9d07518ce33111ad43b88049c1ac2 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Mon, 28 Jan 2013 22:40:16 -0800
Subject: [PATCH 228/291] Add easymock to POMs

---
 core/pom.xml | 5 +++++
 pom.xml      | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/core/pom.xml b/core/pom.xml
index 862d3ec37a..a2b9b726a6 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -98,6 +98,11 @@
       <artifactId>scalacheck_${scala.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.easymock</groupId>
+      <artifactId>easymock</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
diff --git a/pom.xml b/pom.xml
index 3ea989a082..4a4ff560e7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -273,6 +273,12 @@
         <version>1.8</version>
         <scope>test</scope>
       </dependency>
+      <dependency>
+        <groupId>org.easymock</groupId>
+        <artifactId>easymock</artifactId>
+        <version>3.1</version>
+        <scope>test</scope>
+      </dependency>
       <dependency>
         <groupId>org.scalacheck</groupId>
         <artifactId>scalacheck_${scala.version}</artifactId>

From 16a0789e10d2ac714e7c623b026c4a58ca9678d6 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Tue, 29 Jan 2013 17:09:53 -0800
Subject: [PATCH 229/291] Remember ConnectionManagerId used to initiate
 SendingConnections.

This prevents ConnectionManager from getting confused if a machine
has multiple host names and the one getHostName() finds happens
not to be the one that was passed from, e.g., the BlockManagerMaster.
---
 .../src/main/scala/spark/network/Connection.scala | 15 +++++++++++----
 .../scala/spark/network/ConnectionManager.scala   |  3 ++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/spark/network/Connection.scala b/core/src/main/scala/spark/network/Connection.scala
index c193bf7c8d..cd5b7d57f3 100644
--- a/core/src/main/scala/spark/network/Connection.scala
+++ b/core/src/main/scala/spark/network/Connection.scala
@@ -12,7 +12,14 @@ import java.net._
 
 
 private[spark]
-abstract class Connection(val channel: SocketChannel, val selector: Selector) extends Logging {
+abstract class Connection(val channel: SocketChannel, val selector: Selector,
+                          val remoteConnectionManagerId: ConnectionManagerId) extends Logging {
+  def this(channel_ : SocketChannel, selector_ : Selector) = {
+    this(channel_, selector_,
+         ConnectionManagerId.fromSocketAddress(
+            channel_.socket.getRemoteSocketAddress().asInstanceOf[InetSocketAddress]
+         ))
+  }
 
   channel.configureBlocking(false)
   channel.socket.setTcpNoDelay(true)
@@ -25,7 +32,6 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector) ex
   var onKeyInterestChangeCallback: (Connection, Int) => Unit = null
 
   val remoteAddress = getRemoteAddress()
-  val remoteConnectionManagerId = ConnectionManagerId.fromSocketAddress(remoteAddress)
 
   def key() = channel.keyFor(selector)
 
@@ -103,8 +109,9 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector) ex
 }
 
 
-private[spark] class SendingConnection(val address: InetSocketAddress, selector_ : Selector) 
-extends Connection(SocketChannel.open, selector_) {
+private[spark] class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
+                                       remoteId_ : ConnectionManagerId)
+extends Connection(SocketChannel.open, selector_, remoteId_) {
 
   class Outbox(fair: Int = 0) {
     val messages = new Queue[Message]()
diff --git a/core/src/main/scala/spark/network/ConnectionManager.scala b/core/src/main/scala/spark/network/ConnectionManager.scala
index 2ecd14f536..c7f226044d 100644
--- a/core/src/main/scala/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/spark/network/ConnectionManager.scala
@@ -299,7 +299,8 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
   private def sendMessage(connectionManagerId: ConnectionManagerId, message: Message) {
     def startNewConnection(): SendingConnection = {
       val inetSocketAddress = new InetSocketAddress(connectionManagerId.host, connectionManagerId.port)
-      val newConnection = connectionRequests.getOrElseUpdate(connectionManagerId, new SendingConnection(inetSocketAddress, selector))
+      val newConnection = connectionRequests.getOrElseUpdate(connectionManagerId,
+          new SendingConnection(inetSocketAddress, selector, connectionManagerId))
       newConnection   
     }
     val lookupKey = ConnectionManagerId.fromSocketAddress(connectionManagerId.toSocketAddress)

From 0f81025ecadbfd21edb64602658ae8ba26e5bf66 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Tue, 29 Jan 2013 18:54:58 -0800
Subject: [PATCH 230/291] Add easymock to SBT configuration.

---
 project/SparkBuild.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 03b8094f7d..af8b5ba017 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -92,7 +92,8 @@ object SparkBuild extends Build {
       "org.eclipse.jetty" % "jetty-server" % "7.5.3.v20111011",
       "org.scalatest" %% "scalatest" % "1.8" % "test",
       "org.scalacheck" %% "scalacheck" % "1.9" % "test",
-      "com.novocode" % "junit-interface" % "0.8" % "test"
+      "com.novocode" % "junit-interface" % "0.8" % "test",
+      "org.easymock" % "easymock" % "3.1" % "test"
     ),
     parallelExecution := false,
     /* Workaround for issue #206 (fixed after SBT 0.11.0) */

From a3d14c0404d6b28433784f84086a29ecc0045a12 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Mon, 28 Jan 2013 22:41:08 -0800
Subject: [PATCH 231/291] Refactoring to DAGScheduler to aid testing

---
 core/src/main/scala/spark/SparkContext.scala  |  1 +
 .../scala/spark/scheduler/DAGScheduler.scala  | 29 +++++++++++--------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index dc9b8688b3..6ae04f4a44 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -187,6 +187,7 @@ class SparkContext(
   taskScheduler.start()
 
   private var dagScheduler = new DAGScheduler(taskScheduler)
+  dagScheduler.start()
 
   /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
   val hadoopConfiguration = {
diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index b130be6a38..9655961162 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -23,7 +23,14 @@ import util.{MetadataCleaner, TimeStampedHashMap}
  * and to report fetch failures (the submitTasks method, and code to add CompletionEvents).
  */
 private[spark]
-class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with Logging {
+class DAGScheduler(taskSched: TaskScheduler, 
+                   mapOutputTracker: MapOutputTracker,
+                   blockManagerMaster: BlockManagerMaster,
+                   env: SparkEnv)
+                      extends TaskSchedulerListener with Logging {
+  def this(taskSched: TaskScheduler) {
+    this(taskSched, SparkEnv.get.mapOutputTracker, SparkEnv.get.blockManager.master, SparkEnv.get)
+  }
   taskSched.setListener(this)
 
   // Called by TaskScheduler to report task completions or failures.
@@ -66,10 +73,6 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
 
   var cacheLocs = new HashMap[Int, Array[List[String]]]
 
-  val env = SparkEnv.get
-  val mapOutputTracker = env.mapOutputTracker
-  val blockManagerMaster = env.blockManager.master
-
   // For tracking failed nodes, we use the MapOutputTracker's generation number, which is
   // sent with every task. When we detect a node failing, we note the current generation number
   // and failed executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask
@@ -90,12 +93,14 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   val metadataCleaner = new MetadataCleaner("DAGScheduler", this.cleanup)
 
   // Start a thread to run the DAGScheduler event loop
-  new Thread("DAGScheduler") {
-    setDaemon(true)
-    override def run() {
-      DAGScheduler.this.run()
-    }
-  }.start()
+  def start() {
+    new Thread("DAGScheduler") {
+      setDaemon(true)
+      override def run() {
+        DAGScheduler.this.run()
+      }
+    }.start()
+  }
 
   def getCacheLocs(rdd: RDD[_]): Array[List[String]] = {
     if (!cacheLocs.contains(rdd.id)) {
@@ -546,7 +551,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     if (!failedGeneration.contains(execId) || failedGeneration(execId) < currentGeneration) {
       failedGeneration(execId) = currentGeneration
       logInfo("Executor lost: %s (generation %d)".format(execId, currentGeneration))
-      env.blockManager.master.removeExecutor(execId)
+      blockManagerMaster.removeExecutor(execId)
       // TODO: This will be really slow if we keep accumulating shuffle map stages
       for ((shuffleId, stage) <- shuffleToMapStage) {
         stage.removeOutputsOnExecutor(execId)

From 9eac7d01f0880d1d3d51e922ef2566c4ee92989f Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Mon, 28 Jan 2013 22:42:35 -0800
Subject: [PATCH 232/291] Add DAGScheduler tests.

---
 .../spark/scheduler/DAGSchedulerSuite.scala   | 540 ++++++++++++++++++
 1 file changed, 540 insertions(+)
 create mode 100644 core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala

diff --git a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
new file mode 100644
index 0000000000..53f5214d7a
--- /dev/null
+++ b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
@@ -0,0 +1,540 @@
+package spark.scheduler
+
+import scala.collection.mutable.{Map, HashMap}
+
+import org.scalatest.FunSuite
+import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.AsyncAssertions
+import org.scalatest.concurrent.TimeLimitedTests
+import org.scalatest.mock.EasyMockSugar
+import org.scalatest.time.{Span, Seconds}
+
+import org.easymock.EasyMock._
+import org.easymock.EasyMock
+import org.easymock.{IAnswer, IArgumentMatcher}
+
+import akka.actor.ActorSystem
+
+import spark.storage.BlockManager
+import spark.storage.BlockManagerId
+import spark.storage.BlockManagerMaster
+import spark.{Dependency, ShuffleDependency, OneToOneDependency}
+import spark.FetchFailedException
+import spark.MapOutputTracker
+import spark.RDD
+import spark.SparkContext
+import spark.SparkException
+import spark.Split
+import spark.TaskContext
+import spark.TaskEndReason
+
+import spark.{FetchFailed, Success}
+
+class DAGSchedulerSuite extends FunSuite
+    with BeforeAndAfter with EasyMockSugar with TimeLimitedTests
+    with AsyncAssertions with spark.Logging {
+
+  // If we crash the DAGScheduler thread, our test will probably hang.
+  override val timeLimit = Span(5, Seconds)
+
+  val sc: SparkContext = new SparkContext("local", "DAGSchedulerSuite")
+  var scheduler: DAGScheduler = null
+  var w: Waiter = null
+  val taskScheduler = mock[TaskScheduler]
+  val blockManagerMaster = mock[BlockManagerMaster]
+  var mapOutputTracker: MapOutputTracker = null
+  var schedulerThread: Thread = null
+  var schedulerException: Throwable = null
+  val taskSetMatchers = new HashMap[MyRDD, IArgumentMatcher]
+  val cacheLocations = new HashMap[(Int, Int), Seq[BlockManagerId]]
+
+  implicit val mocks = MockObjects(taskScheduler, blockManagerMaster)
+
+  def makeBlockManagerId(host: String): BlockManagerId =
+    BlockManagerId("exec-" + host, host, 12345)
+
+  def resetExpecting(f: => Unit) {
+    reset(taskScheduler)
+    reset(blockManagerMaster)
+    expecting(f)
+  }
+
+  before {
+    taskSetMatchers.clear()
+    cacheLocations.clear()
+    val actorSystem = ActorSystem("test")
+    mapOutputTracker = new MapOutputTracker(actorSystem, true)
+    resetExpecting {
+      taskScheduler.setListener(anyObject())
+    }
+    whenExecuting {
+      scheduler = new DAGScheduler(taskScheduler, mapOutputTracker, blockManagerMaster, null)
+    }
+    w = new Waiter
+    schedulerException = null
+    schedulerThread = new Thread("DAGScheduler under test") {
+      override def run() {
+        try {
+          scheduler.run()
+        } catch {
+          case t: Throwable =>
+            logError("Got exception in DAGScheduler: ", t)
+            schedulerException = t
+        } finally {
+          w.dismiss()
+        }
+      }
+    }
+    schedulerThread.start
+    logInfo("finished before")
+  }
+
+  after {
+    logInfo("started after")
+    resetExpecting {
+      taskScheduler.stop()
+    }
+    whenExecuting {
+      scheduler.stop
+      schedulerThread.join
+    }
+    w.await()
+    if (schedulerException != null) {
+      throw new Exception("Exception caught from scheduler thread", schedulerException)
+    }
+  }
+
+  // Type of RDD we use for testing. Note that we should never call the real RDD compute methods.
+  // This is a pair RDD type so it can always be used in ShuffleDependencies.
+  type MyRDD = RDD[(Int, Int)]
+
+  def makeRdd(
+        numSplits: Int,
+        dependencies: List[Dependency[_]],
+        locations: Seq[Seq[String]] = Nil
+      ): MyRDD = {
+    val maxSplit = numSplits - 1
+    return new MyRDD(sc, dependencies) {
+      override def compute(split: Split, context: TaskContext): Iterator[(Int, Int)] =
+        throw new RuntimeException("should not be reached")
+      override def getSplits() = (0 to maxSplit).map(i => new Split {
+        override def index = i
+      }).toArray
+      override def getPreferredLocations(split: Split): Seq[String] =
+        if (locations.isDefinedAt(split.index))
+          locations(split.index)
+        else
+          Nil
+      override def toString: String = "DAGSchedulerSuiteRDD " + id
+    }
+  }
+
+  def taskSetForRdd(rdd: MyRDD): TaskSet = {
+    val matcher = taskSetMatchers.getOrElseUpdate(rdd,
+      new IArgumentMatcher {
+        override def matches(actual: Any): Boolean = {
+          val taskSet = actual.asInstanceOf[TaskSet]
+          taskSet.tasks(0) match {
+            case rt: ResultTask[_, _] => rt.rdd.id == rdd.id
+            case smt: ShuffleMapTask => smt.rdd.id == rdd.id
+            case _ => false
+          }
+        }
+        override def appendTo(buf: StringBuffer) {
+          buf.append("taskSetForRdd(" + rdd + ")")
+        }
+      })
+    EasyMock.reportMatcher(matcher)
+    return null
+  }
+
+  def expectGetLocations(): Unit = {
+    EasyMock.expect(blockManagerMaster.getLocations(anyObject().asInstanceOf[Array[String]])).
+        andAnswer(new IAnswer[Seq[Seq[BlockManagerId]]] {
+      override def answer(): Seq[Seq[BlockManagerId]] = {
+        val blocks = getCurrentArguments()(0).asInstanceOf[Array[String]]
+        return blocks.map { name =>
+          val pieces = name.split("_")
+          if (pieces(0) == "rdd") {
+            val key = pieces(1).toInt -> pieces(2).toInt
+            if (cacheLocations.contains(key)) {
+              cacheLocations(key)
+            } else {
+              Seq[BlockManagerId]()
+            }
+          } else {
+            Seq[BlockManagerId]()
+          }
+        }.toSeq
+      }
+    }).anyTimes()
+  }
+
+  def expectStageAnd(rdd: MyRDD, results: Seq[(TaskEndReason, Any)],
+      preferredLocations: Option[Seq[Seq[String]]] = None)(afterSubmit: TaskSet => Unit) {
+    // TODO: Remember which submission
+    EasyMock.expect(taskScheduler.submitTasks(taskSetForRdd(rdd))).andAnswer(new IAnswer[Unit] {
+      override def answer(): Unit = {
+        val taskSet = getCurrentArguments()(0).asInstanceOf[TaskSet]
+        for (task <- taskSet.tasks) {
+          task.generation = mapOutputTracker.getGeneration
+        }
+        afterSubmit(taskSet)
+        preferredLocations match {
+          case None =>
+            for (taskLocs <- taskSet.tasks.map(_.preferredLocations)) {
+              w { assert(taskLocs.size === 0) }
+            }
+          case Some(locations) =>
+            w { assert(locations.size === taskSet.tasks.size) }
+            for ((expectLocs, taskLocs) <-
+                    taskSet.tasks.map(_.preferredLocations).zip(locations)) {
+              w { assert(expectLocs === taskLocs) }
+            }
+        }
+        w { assert(taskSet.tasks.size >= results.size)}
+        for ((result, i) <- results.zipWithIndex) {
+          if (i < taskSet.tasks.size) {
+            scheduler.taskEnded(taskSet.tasks(i), result._1, result._2, Map[Long, Any]())
+          }
+        }
+      }
+    })
+  }
+
+  def expectStage(rdd: MyRDD, results: Seq[(TaskEndReason, Any)],
+                  preferredLocations: Option[Seq[Seq[String]]] = None) {
+    expectStageAnd(rdd, results, preferredLocations) { _ => }
+  }
+
+  def submitRdd(rdd: MyRDD, allowLocal: Boolean = false): Array[Int] = {
+    return scheduler.runJob[(Int, Int), Int](
+        rdd,
+        (context: TaskContext, it: Iterator[(Int, Int)]) => it.next._1.asInstanceOf[Int],
+        (0 to (rdd.splits.size - 1)),
+        "test-site",
+        allowLocal
+    )
+  }
+
+  def makeMapStatus(host: String, reduces: Int): MapStatus =
+    new MapStatus(makeBlockManagerId(host), Array.fill[Byte](reduces)(2))
+
+  test("zero split job") {
+    val rdd = makeRdd(0, Nil)
+    resetExpecting {
+      expectGetLocations()
+      // deliberately expect no stages to be submitted
+    }
+    whenExecuting {
+      assert(submitRdd(rdd) === Array[Int]())
+    }
+  }
+
+  test("run trivial job") {
+    val rdd = makeRdd(1, Nil)
+    resetExpecting {
+      expectGetLocations()
+      expectStage(rdd, List( (Success, 42) ))
+    }
+    whenExecuting {
+      assert(submitRdd(rdd) === Array(42))
+    }
+  }
+
+  test("local job") {
+    val rdd = new MyRDD(sc, Nil) {
+      override def compute(split: Split, context: TaskContext): Iterator[(Int, Int)] =
+        Array(42 -> 0).iterator
+      override def getSplits() = Array( new Split { override def index = 0 } )
+      override def getPreferredLocations(split: Split) = Nil
+      override def toString = "DAGSchedulerSuite Local RDD"
+    }
+    resetExpecting {
+      expectGetLocations()
+      // deliberately expect no stages to be submitted
+    }
+    whenExecuting {
+      assert(submitRdd(rdd, true) === Array(42))
+    }
+  }
+
+  test("run trivial job w/ dependency") {
+    val baseRdd = makeRdd(1, Nil)
+    val finalRdd = makeRdd(1, List(new OneToOneDependency(baseRdd)))
+    resetExpecting {
+      expectGetLocations()
+      expectStage(finalRdd, List( (Success, 42) ))
+    }
+    whenExecuting {
+      assert(submitRdd(finalRdd) === Array(42))
+    }
+  }
+
+  test("location preferences w/ dependency") {
+    val baseRdd = makeRdd(1, Nil)
+    val finalRdd = makeRdd(1, List(new OneToOneDependency(baseRdd)))
+    resetExpecting {
+      expectGetLocations()
+      cacheLocations(baseRdd.id -> 0) =
+        Seq(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))
+      expectStage(finalRdd, List( (Success, 42) ),
+                  Some(List(Seq("hostA", "hostB"))))
+    }
+    whenExecuting {
+      assert(submitRdd(finalRdd) === Array(42))
+    }
+  }
+
+  test("trivial job failure") {
+    val rdd = makeRdd(1, Nil)
+    resetExpecting {
+      expectGetLocations()
+      expectStageAnd(rdd, List()) { taskSet => scheduler.taskSetFailed(taskSet, "test failure") }
+    }
+    whenExecuting(taskScheduler, blockManagerMaster) {
+      intercept[SparkException] { submitRdd(rdd) }
+    }
+  }
+
+  test("run trivial shuffle") {
+    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
+    val shuffleId = shuffleDep.shuffleId
+    val reduceRdd = makeRdd(1, List(shuffleDep))
+
+    resetExpecting {
+      expectGetLocations()
+      expectStage(shuffleMapRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostB", 1))
+      ))
+      expectStageAnd(reduceRdd, List( (Success, 42) )) { _ =>
+        w { assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
+                   Array(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))) }
+      }
+    }
+    whenExecuting {
+      assert(submitRdd(reduceRdd) === Array(42))
+    }
+  }
+
+  test("run trivial shuffle with fetch failure") {
+    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
+    val shuffleId = shuffleDep.shuffleId
+    val reduceRdd = makeRdd(2, List(shuffleDep))
+
+    resetExpecting {
+      expectGetLocations()
+      expectStage(shuffleMapRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostB", 1))
+      ))
+      blockManagerMaster.removeExecutor("exec-hostA")
+      expectStage(reduceRdd, List(
+        (Success, 42),
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0), null)
+      ))
+      // partial recompute
+      expectStage(shuffleMapRdd, List( (Success, makeMapStatus("hostA", 1)) ))
+      expectStageAnd(reduceRdd, List( (Success, 43) )) { _ =>
+        w { assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
+                   Array(makeBlockManagerId("hostA"),
+                         makeBlockManagerId("hostB"))) }
+      }
+    }
+    whenExecuting {
+      assert(submitRdd(reduceRdd) === Array(42, 43))
+    }
+  }
+
+  test("ignore late map task completions") {
+    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
+    val shuffleId = shuffleDep.shuffleId
+    val reduceRdd = makeRdd(2, List(shuffleDep))
+
+    resetExpecting {
+      expectGetLocations()
+      expectStageAnd(shuffleMapRdd, List(
+        (Success, makeMapStatus("hostA", 1))
+      )) { taskSet =>
+        val newGeneration = mapOutputTracker.getGeneration + 1
+        scheduler.executorLost("exec-hostA")
+        val noAccum = Map[Long, Any]()
+        // We rely on the event queue being ordered and increasing the generation number by 1
+        // should be ignored for being too old
+        scheduler.taskEnded(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum)
+        // should work because it's a non-failed host
+        scheduler.taskEnded(taskSet.tasks(0), Success, makeMapStatus("hostB", 1), noAccum)
+        // should be ignored for being too old
+        scheduler.taskEnded(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum)
+        // should be ignored (not end the stage) because it's too old
+        scheduler.taskEnded(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum)
+        taskSet.tasks(1).generation = newGeneration
+        scheduler.taskEnded(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum)
+      }
+      blockManagerMaster.removeExecutor("exec-hostA")
+      expectStageAnd(reduceRdd, List(
+        (Success, 42), (Success, 43)
+      )) { _ =>
+        w { assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
+                   Array(makeBlockManagerId("hostB"), makeBlockManagerId("hostA"))) }
+      }
+    }
+    whenExecuting {
+      assert(submitRdd(reduceRdd) === Array(42, 43))
+    }
+  }
+
+  test("run trivial shuffle with out-of-band failure") {
+    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
+    val shuffleId = shuffleDep.shuffleId
+    val reduceRdd = makeRdd(1, List(shuffleDep))
+    resetExpecting {
+      expectGetLocations()
+      blockManagerMaster.removeExecutor("exec-hostA")
+      expectStageAnd(shuffleMapRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostB", 1))
+      )) { _ => scheduler.executorLost("exec-hostA") }
+      expectStage(shuffleMapRdd, List(
+        (Success, makeMapStatus("hostC", 1))
+      ))
+      expectStageAnd(reduceRdd, List( (Success, 42) )) { _ =>
+        w { assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
+                   Array(makeBlockManagerId("hostC"),
+                         makeBlockManagerId("hostB"))) }
+      }
+    }
+    whenExecuting {
+      assert(submitRdd(reduceRdd) === Array(42))
+    }
+  }
+
+  test("recursive shuffle failures") {
+    val shuffleOneRdd = makeRdd(2, Nil)
+    val shuffleDepOne = new ShuffleDependency(shuffleOneRdd, null)
+    val shuffleTwoRdd = makeRdd(2, List(shuffleDepOne))
+    val shuffleDepTwo = new ShuffleDependency(shuffleTwoRdd, null)
+    val finalRdd = makeRdd(1, List(shuffleDepTwo))
+
+    resetExpecting {
+      expectGetLocations()
+      expectStage(shuffleOneRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostB", 1))
+      ))
+      expectStage(shuffleTwoRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostC", 1))
+      ))
+      blockManagerMaster.removeExecutor("exec-hostA")
+      expectStage(finalRdd, List(
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
+      ))
+      // triggers a partial recompute of the first stage, then the second
+      expectStage(shuffleOneRdd, List(
+        (Success, makeMapStatus("hostA", 1))
+      ))
+      expectStage(shuffleTwoRdd, List(
+        (Success, makeMapStatus("hostA", 1))
+      ))
+      expectStage(finalRdd, List(
+        (Success, 42)
+      ))
+    }
+    whenExecuting {
+      assert(submitRdd(finalRdd) === Array(42))
+    }
+  }
+
+  test("cached post-shuffle") {
+    val shuffleOneRdd = makeRdd(2, Nil)
+    val shuffleDepOne = new ShuffleDependency(shuffleOneRdd, null)
+    val shuffleTwoRdd = makeRdd(2, List(shuffleDepOne))
+    val shuffleDepTwo = new ShuffleDependency(shuffleTwoRdd, null)
+    val finalRdd = makeRdd(1, List(shuffleDepTwo))
+
+    resetExpecting {
+      expectGetLocations()
+      expectStage(shuffleOneRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostB", 1))
+      ))
+      expectStageAnd(shuffleTwoRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostC", 1))
+      )){ _ =>
+        cacheLocations(shuffleTwoRdd.id -> 0) = Seq(makeBlockManagerId("hostD"))
+        cacheLocations(shuffleTwoRdd.id -> 1) = Seq(makeBlockManagerId("hostC"))
+      }
+      blockManagerMaster.removeExecutor("exec-hostA")
+      expectStage(finalRdd, List(
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
+      ))
+      // since we have a cached copy of the missing split of shuffleTwoRdd, we shouldn't
+      // immediately try to rerun shuffleOneRdd:
+      expectStage(shuffleTwoRdd, List(
+        (Success, makeMapStatus("hostD", 1))
+      ), Some(Seq(List("hostD"))))
+      expectStage(finalRdd, List(
+        (Success, 42)
+      ))
+    }
+    whenExecuting {
+      assert(submitRdd(finalRdd) === Array(42))
+    }
+  }
+
+  test("cached post-shuffle but fails") {
+    val shuffleOneRdd = makeRdd(2, Nil)
+    val shuffleDepOne = new ShuffleDependency(shuffleOneRdd, null)
+    val shuffleTwoRdd = makeRdd(2, List(shuffleDepOne))
+    val shuffleDepTwo = new ShuffleDependency(shuffleTwoRdd, null)
+    val finalRdd = makeRdd(1, List(shuffleDepTwo))
+
+    resetExpecting {
+      expectGetLocations()
+      expectStage(shuffleOneRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostB", 1))
+      ))
+      expectStageAnd(shuffleTwoRdd, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostC", 1))
+      )){ _ =>
+        cacheLocations(shuffleTwoRdd.id -> 0) = Seq(makeBlockManagerId("hostD"))
+        cacheLocations(shuffleTwoRdd.id -> 1) = Seq(makeBlockManagerId("hostC"))
+      }
+      blockManagerMaster.removeExecutor("exec-hostA")
+      expectStage(finalRdd, List(
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
+      ))
+      // since we have a cached copy of the missing split of shuffleTwoRdd, we shouldn't
+      // immediately try to rerun shuffleOneRdd:
+      expectStageAnd(shuffleTwoRdd, List(
+        (FetchFailed(null, shuffleDepOne.shuffleId, 0, 0), null)
+      ), Some(Seq(List("hostD")))) { _ =>
+        w {
+          intercept[FetchFailedException]{
+            mapOutputTracker.getServerStatuses(shuffleDepOne.shuffleId, 0)
+          }
+        }
+        cacheLocations.remove(shuffleTwoRdd.id -> 0)
+      }
+      // after that fetch failure, we should refetch the cache locations and try to recompute
+      // the whole chain. Note that we will ignore that a fetch failure previously occured on
+      // this host.
+      expectStage(shuffleOneRdd, List( (Success, makeMapStatus("hostA", 1)) ))
+      expectStage(shuffleTwoRdd, List( (Success, makeMapStatus("hostA", 1)) ))
+      expectStage(finalRdd, List( (Success, 42) ))
+    }
+    whenExecuting {
+      assert(submitRdd(finalRdd) === Array(42))
+    }
+  }
+}
+

From 4bf3d7ea1252454ca584a3dabf26bdeab4069409 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Tue, 29 Jan 2013 19:05:45 -0800
Subject: [PATCH 233/291] Clear spark.master.port to cleanup for other tests

---
 core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
index 53f5214d7a..6c577c2685 100644
--- a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
@@ -102,6 +102,7 @@ class DAGSchedulerSuite extends FunSuite
     if (schedulerException != null) {
       throw new Exception("Exception caught from scheduler thread", schedulerException)
     }
+    System.clearProperty("spark.master.port")
   }
 
   // Type of RDD we use for testing. Note that we should never call the real RDD compute methods.

From 178b89204c9dbee36886e757ddaafbd079672f4a Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 30 Jan 2013 09:19:55 -0800
Subject: [PATCH 234/291] Refactor DAGScheduler more to allow testing without a
 separate thread.

---
 .../scala/spark/scheduler/DAGScheduler.scala  | 176 +++++++++++-------
 1 file changed, 111 insertions(+), 65 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 9655961162..6892509ed1 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -23,11 +23,13 @@ import util.{MetadataCleaner, TimeStampedHashMap}
  * and to report fetch failures (the submitTasks method, and code to add CompletionEvents).
  */
 private[spark]
-class DAGScheduler(taskSched: TaskScheduler, 
-                   mapOutputTracker: MapOutputTracker,
-                   blockManagerMaster: BlockManagerMaster,
-                   env: SparkEnv)
-                      extends TaskSchedulerListener with Logging {
+class DAGScheduler(
+    taskSched: TaskScheduler,
+    mapOutputTracker: MapOutputTracker,
+    blockManagerMaster: BlockManagerMaster,
+    env: SparkEnv)
+  extends TaskSchedulerListener with Logging {
+
   def this(taskSched: TaskScheduler) {
     this(taskSched, SparkEnv.get.mapOutputTracker, SparkEnv.get.blockManager.master, SparkEnv.get)
   }
@@ -203,6 +205,27 @@ class DAGScheduler(taskSched: TaskScheduler,
     missing.toList
   }
 
+  /** Returns (and does not) submit a JobSubmitted event suitable to run a given job, and
+   * a JobWaiter whose getResult() method will return the result of the job when it is complete.
+   *
+   * The job is assumed to have at least one partition; zero partition jobs should be handled
+   * without a JobSubmitted event.
+   */
+  private[scheduler] def prepareJob[T, U: ClassManifest](
+      finalRdd: RDD[T],
+      func: (TaskContext, Iterator[T]) => U,
+      partitions: Seq[Int],
+      callSite: String,
+      allowLocal: Boolean)
+    : (JobSubmitted, JobWaiter) =
+  {
+    assert(partitions.size > 0)
+    val waiter = new JobWaiter(partitions.size)
+    val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
+    val toSubmit = JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter)
+    return (toSubmit, waiter)
+  }
+
   def runJob[T, U: ClassManifest](
       finalRdd: RDD[T],
       func: (TaskContext, Iterator[T]) => U,
@@ -214,9 +237,8 @@ class DAGScheduler(taskSched: TaskScheduler,
     if (partitions.size == 0) {
       return new Array[U](0)
     }
-    val waiter = new JobWaiter(partitions.size)
-    val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
-    eventQueue.put(JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter))
+    val (toSubmit, waiter) = prepareJob(finalRdd, func, partitions, callSite, allowLocal)
+    eventQueue.put(toSubmit)
     waiter.getResult() match {
       case JobSucceeded(results: Seq[_]) =>
         return results.asInstanceOf[Seq[U]].toArray
@@ -241,6 +263,81 @@ class DAGScheduler(taskSched: TaskScheduler,
     return listener.getResult()    // Will throw an exception if the job fails
   }
 
+  /** Process one event retrieved from the event queue.
+   * Returns true if we should stop the event loop.
+   */
+  private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = {
+    event match {
+      case JobSubmitted(finalRDD, func, partitions, allowLocal, callSite, listener) =>
+        val runId = nextRunId.getAndIncrement()
+        val finalStage = newStage(finalRDD, None, runId)
+        val job = new ActiveJob(runId, finalStage, func, partitions, callSite, listener)
+        clearCacheLocs()
+        logInfo("Got job " + job.runId + " (" + callSite + ") with " + partitions.length +
+                " output partitions (allowLocal=" + allowLocal + ")")
+        logInfo("Final stage: " + finalStage + " (" + finalStage.origin + ")")
+        logInfo("Parents of final stage: " + finalStage.parents)
+        logInfo("Missing parents: " + getMissingParentStages(finalStage))
+        if (allowLocal && finalStage.parents.size == 0 && partitions.length == 1) {
+          // Compute very short actions like first() or take() with no parent stages locally.
+          runLocally(job)
+        } else {
+          activeJobs += job
+          resultStageToJob(finalStage) = job
+          submitStage(finalStage)
+        }
+
+      case ExecutorLost(execId) =>
+        handleExecutorLost(execId)
+
+      case completion: CompletionEvent =>
+        handleTaskCompletion(completion)
+
+      case TaskSetFailed(taskSet, reason) =>
+        abortStage(idToStage(taskSet.stageId), reason)
+
+      case StopDAGScheduler =>
+        // Cancel any active jobs
+        for (job <- activeJobs) {
+          val error = new SparkException("Job cancelled because SparkContext was shut down")
+          job.listener.jobFailed(error)
+        }
+        return true
+    }
+    return false
+  }
+
+  /** Resubmit any failed stages. Ordinarily called after a small amount of time has passed since
+   * the last fetch failure.
+   */
+  private[scheduler] def resubmitFailedStages() {
+    logInfo("Resubmitting failed stages")
+    clearCacheLocs()
+    val failed2 = failed.toArray
+    failed.clear()
+    for (stage <- failed2.sortBy(_.priority)) {
+      submitStage(stage)
+    }
+  }
+  
+  /** Check for waiting or failed stages which are now eligible for resubmission.
+   * Ordinarily run on every iteration of the event loop.
+   */
+  private[scheduler] def submitWaitingStages() {
+    // TODO: We might want to run this less often, when we are sure that something has become
+    // runnable that wasn't before.
+    logTrace("Checking for newly runnable parent stages")
+    logTrace("running: " + running)
+    logTrace("waiting: " + waiting)
+    logTrace("failed: " + failed)
+    val waiting2 = waiting.toArray
+    waiting.clear()
+    for (stage <- waiting2.sortBy(_.priority)) {
+      submitStage(stage)
+    }
+  }
+
+
   /**
    * The main event loop of the DAG scheduler, which waits for new-job / task-finished / failure
    * events and responds by launching tasks. This runs in a dedicated thread and receives events
@@ -251,77 +348,26 @@ class DAGScheduler(taskSched: TaskScheduler,
 
     while (true) {
       val event = eventQueue.poll(POLL_TIMEOUT, TimeUnit.MILLISECONDS)
-      val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability
       if (event != null) {
         logDebug("Got event of type " + event.getClass.getName)
       }
 
-      event match {
-        case JobSubmitted(finalRDD, func, partitions, allowLocal, callSite, listener) =>
-          val runId = nextRunId.getAndIncrement()
-          val finalStage = newStage(finalRDD, None, runId)
-          val job = new ActiveJob(runId, finalStage, func, partitions, callSite, listener)
-          clearCacheLocs()
-          logInfo("Got job " + job.runId + " (" + callSite + ") with " + partitions.length +
-                  " output partitions")
-          logInfo("Final stage: " + finalStage + " (" + finalStage.origin + ")")
-          logInfo("Parents of final stage: " + finalStage.parents)
-          logInfo("Missing parents: " + getMissingParentStages(finalStage))
-          if (allowLocal && finalStage.parents.size == 0 && partitions.length == 1) {
-            // Compute very short actions like first() or take() with no parent stages locally.
-            runLocally(job)
-          } else {
-            activeJobs += job
-            resultStageToJob(finalStage) = job
-            submitStage(finalStage)
-          }
-
-        case ExecutorLost(execId) =>
-          handleExecutorLost(execId)
-
-        case completion: CompletionEvent =>
-          handleTaskCompletion(completion)
-
-        case TaskSetFailed(taskSet, reason) =>
-          abortStage(idToStage(taskSet.stageId), reason)
-
-        case StopDAGScheduler =>
-          // Cancel any active jobs
-          for (job <- activeJobs) {
-            val error = new SparkException("Job cancelled because SparkContext was shut down")
-            job.listener.jobFailed(error)
-          }
+      if (event != null) {
+        if (processEvent(event)) {
           return
-
-        case null =>
-          // queue.poll() timed out, ignore it
+        }
       }
 
+      val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability
       // Periodically resubmit failed stages if some map output fetches have failed and we have
       // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails,
       // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at
       // the same time, so we want to make sure we've identified all the reduce tasks that depend
       // on the failed node.
       if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) {
-        logInfo("Resubmitting failed stages")
-        clearCacheLocs()
-        val failed2 = failed.toArray
-        failed.clear()
-        for (stage <- failed2.sortBy(_.priority)) {
-          submitStage(stage)
-        }
+        resubmitFailedStages
       } else {
-        // TODO: We might want to run this less often, when we are sure that something has become
-        // runnable that wasn't before.
-        logTrace("Checking for newly runnable parent stages")
-        logTrace("running: " + running)
-        logTrace("waiting: " + waiting)
-        logTrace("failed: " + failed)
-        val waiting2 = waiting.toArray
-        waiting.clear()
-        for (stage <- waiting2.sortBy(_.priority)) {
-          submitStage(stage)
-        }
+        submitWaitingStages
       }
     }
   }

From 9c0bae75ade9e5b5a69077a5719adf4ee96e2c2e Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 30 Jan 2013 09:22:07 -0800
Subject: [PATCH 235/291] Change DAGSchedulerSuite to run DAGScheduler in the
 same Thread.

---
 .../spark/scheduler/DAGSchedulerSuite.scala   | 582 ++++++++++--------
 1 file changed, 326 insertions(+), 256 deletions(-)

diff --git a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
index 6c577c2685..89173540d4 100644
--- a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
@@ -4,12 +4,12 @@ import scala.collection.mutable.{Map, HashMap}
 
 import org.scalatest.FunSuite
 import org.scalatest.BeforeAndAfter
-import org.scalatest.concurrent.AsyncAssertions
 import org.scalatest.concurrent.TimeLimitedTests
 import org.scalatest.mock.EasyMockSugar
 import org.scalatest.time.{Span, Seconds}
 
 import org.easymock.EasyMock._
+import org.easymock.Capture
 import org.easymock.EasyMock
 import org.easymock.{IAnswer, IArgumentMatcher}
 
@@ -30,33 +30,55 @@ import spark.TaskEndReason
 
 import spark.{FetchFailed, Success}
 
-class DAGSchedulerSuite extends FunSuite
-    with BeforeAndAfter with EasyMockSugar with TimeLimitedTests
-    with AsyncAssertions with spark.Logging {
+class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar with TimeLimitedTests {
 
-  // If we crash the DAGScheduler thread, our test will probably hang.
+  // impose a time limit on this test in case we don't let the job finish.
   override val timeLimit = Span(5, Seconds)
 
   val sc: SparkContext = new SparkContext("local", "DAGSchedulerSuite")
   var scheduler: DAGScheduler = null
-  var w: Waiter = null
   val taskScheduler = mock[TaskScheduler]
   val blockManagerMaster = mock[BlockManagerMaster]
   var mapOutputTracker: MapOutputTracker = null
   var schedulerThread: Thread = null
   var schedulerException: Throwable = null
+
+  /** Set of EasyMock argument matchers that match a TaskSet for a given RDD.
+   * We cache these so we do not create duplicate matchers for the same RDD.
+   * This allows us to easily setup a sequence of expectations for task sets for
+   * that RDD.
+   */
   val taskSetMatchers = new HashMap[MyRDD, IArgumentMatcher]
+
+  /** Set of cache locations to return from our mock BlockManagerMaster.
+   * Keys are (rdd ID, partition ID). Anything not present will return an empty
+   * list of cache locations silently.
+   */
   val cacheLocations = new HashMap[(Int, Int), Seq[BlockManagerId]]
 
+  /** JobWaiter for the last JobSubmitted event we pushed. To keep tests (most of which
+   * will only submit one job) from needing to explicitly track it.
+   */
+  var lastJobWaiter: JobWaiter = null
+
+  /** Tell EasyMockSugar what mock objects we want to be configured by expecting {...}
+   * and whenExecuting {...} */
   implicit val mocks = MockObjects(taskScheduler, blockManagerMaster)
 
-  def makeBlockManagerId(host: String): BlockManagerId =
-    BlockManagerId("exec-" + host, host, 12345)
-
+  /** Utility function to reset mocks and set expectations on them. EasyMock wants mock objects
+   * to be reset after each time their expectations are set, and we tend to check mock object
+   * calls over a single call to DAGScheduler.
+   *
+   * We also set a default expectation here that blockManagerMaster.getLocations can be called
+   * and will return values from cacheLocations.
+   */
   def resetExpecting(f: => Unit) {
     reset(taskScheduler)
     reset(blockManagerMaster)
-    expecting(f)
+    expecting {
+      expectGetLocations()
+      f
+    }
   }
 
   before {
@@ -70,45 +92,30 @@ class DAGSchedulerSuite extends FunSuite
     whenExecuting {
       scheduler = new DAGScheduler(taskScheduler, mapOutputTracker, blockManagerMaster, null)
     }
-    w = new Waiter
-    schedulerException = null
-    schedulerThread = new Thread("DAGScheduler under test") {
-      override def run() {
-        try {
-          scheduler.run()
-        } catch {
-          case t: Throwable =>
-            logError("Got exception in DAGScheduler: ", t)
-            schedulerException = t
-        } finally {
-          w.dismiss()
-        }
-      }
-    }
-    schedulerThread.start
-    logInfo("finished before")
   }
 
   after {
-    logInfo("started after")
+    assert(scheduler.processEvent(StopDAGScheduler))
     resetExpecting {
       taskScheduler.stop()
     }
     whenExecuting {
-      scheduler.stop
-      schedulerThread.join
-    }
-    w.await()
-    if (schedulerException != null) {
-      throw new Exception("Exception caught from scheduler thread", schedulerException)
+      scheduler.stop()
     }
     System.clearProperty("spark.master.port")
   }
 
-  // Type of RDD we use for testing. Note that we should never call the real RDD compute methods.
-  // This is a pair RDD type so it can always be used in ShuffleDependencies.
+  def makeBlockManagerId(host: String): BlockManagerId =
+    BlockManagerId("exec-" + host, host, 12345)
+
+  /** Type of RDD we use for testing. Note that we should never call the real RDD compute methods.
+   * This is a pair RDD type so it can always be used in ShuffleDependencies. */
   type MyRDD = RDD[(Int, Int)]
 
+  /** Create an RDD for passing to DAGScheduler. These RDDs will use the dependencies and
+   * preferredLocations (if any) that are passed to them. They are deliberately not executable
+   * so we can test that DAGScheduler does not try to execute RDDs locally.
+   */
   def makeRdd(
         numSplits: Int,
         dependencies: List[Dependency[_]],
@@ -130,6 +137,9 @@ class DAGSchedulerSuite extends FunSuite
     }
   }
 
+  /** EasyMock matcher method. For use as an argument matcher for a TaskSet whose first task
+   * is from a particular RDD.
+   */
   def taskSetForRdd(rdd: MyRDD): TaskSet = {
     val matcher = taskSetMatchers.getOrElseUpdate(rdd,
       new IArgumentMatcher {
@@ -149,6 +159,9 @@ class DAGSchedulerSuite extends FunSuite
     return null
   }
 
+  /** Setup an EasyMock expectation to repsond to blockManagerMaster.getLocations() called from
+   * cacheLocations.
+   */
   def expectGetLocations(): Unit = {
     EasyMock.expect(blockManagerMaster.getLocations(anyObject().asInstanceOf[Array[String]])).
         andAnswer(new IAnswer[Seq[Seq[BlockManagerId]]] {
@@ -171,51 +184,106 @@ class DAGSchedulerSuite extends FunSuite
     }).anyTimes()
   }
 
-  def expectStageAnd(rdd: MyRDD, results: Seq[(TaskEndReason, Any)],
-      preferredLocations: Option[Seq[Seq[String]]] = None)(afterSubmit: TaskSet => Unit) {
-    // TODO: Remember which submission
-    EasyMock.expect(taskScheduler.submitTasks(taskSetForRdd(rdd))).andAnswer(new IAnswer[Unit] {
-      override def answer(): Unit = {
-        val taskSet = getCurrentArguments()(0).asInstanceOf[TaskSet]
-        for (task <- taskSet.tasks) {
-          task.generation = mapOutputTracker.getGeneration
-        }
-        afterSubmit(taskSet)
-        preferredLocations match {
-          case None =>
-            for (taskLocs <- taskSet.tasks.map(_.preferredLocations)) {
-              w { assert(taskLocs.size === 0) }
-            }
-          case Some(locations) =>
-            w { assert(locations.size === taskSet.tasks.size) }
-            for ((expectLocs, taskLocs) <-
-                    taskSet.tasks.map(_.preferredLocations).zip(locations)) {
-              w { assert(expectLocs === taskLocs) }
-            }
-        }
-        w { assert(taskSet.tasks.size >= results.size)}
-        for ((result, i) <- results.zipWithIndex) {
-          if (i < taskSet.tasks.size) {
-            scheduler.taskEnded(taskSet.tasks(i), result._1, result._2, Map[Long, Any]())
-          }
-        }
+  /** Process the supplied event as if it were the top of the DAGScheduler event queue, expecting
+   * the scheduler not to exit.
+   *
+   * After processing the event, submit waiting stages as is done on most iterations of the
+   * DAGScheduler event loop.
+   */
+  def runEvent(event: DAGSchedulerEvent) {
+    assert(!scheduler.processEvent(event))
+    scheduler.submitWaitingStages()
+  }
+
+  /** Expect a TaskSet for the specified RDD to be submitted to the TaskScheduler. Should be
+   * called from a resetExpecting { ... } block.
+   *
+   * Returns a easymock Capture that will contain the task set after the stage is submitted.
+   * Most tests should use interceptStage() instead of this directly.
+   */
+  def expectStage(rdd: MyRDD): Capture[TaskSet] = {
+    val taskSetCapture = new Capture[TaskSet]
+    taskScheduler.submitTasks(and(capture(taskSetCapture), taskSetForRdd(rdd)))
+    return taskSetCapture
+  }
+
+  /** Expect the supplied code snippet to submit a stage for the specified RDD.
+   * Return the resulting TaskSet. First marks all the tasks are belonging to the
+   * current MapOutputTracker generation.
+   */
+  def interceptStage(rdd: MyRDD)(f: => Unit): TaskSet = {
+    var capture: Capture[TaskSet] = null
+    resetExpecting {
+      capture = expectStage(rdd)
+    }
+    whenExecuting {
+      f
+    }
+    val taskSet = capture.getValue
+    for (task <- taskSet.tasks) {
+      task.generation = mapOutputTracker.getGeneration
+    }
+    return taskSet
+  }
+
+  /** Send the given CompletionEvent messages for the tasks in the TaskSet. */
+  def respondToTaskSet(taskSet: TaskSet, results: Seq[(TaskEndReason, Any)]) {
+    assert(taskSet.tasks.size >= results.size)
+    for ((result, i) <- results.zipWithIndex) {
+      if (i < taskSet.tasks.size) {
+        runEvent(CompletionEvent(taskSet.tasks(i), result._1, result._2, Map[Long, Any]()))
       }
-    })
+    }
   }
 
-  def expectStage(rdd: MyRDD, results: Seq[(TaskEndReason, Any)],
-                  preferredLocations: Option[Seq[Seq[String]]] = None) {
-    expectStageAnd(rdd, results, preferredLocations) { _ => }
+  /** Assert that the supplied TaskSet has exactly the given preferredLocations. */
+  def expectTaskSetLocations(taskSet: TaskSet, locations: Seq[Seq[String]]) {
+    assert(locations.size === taskSet.tasks.size)
+    for ((expectLocs, taskLocs) <-
+            taskSet.tasks.map(_.preferredLocations).zip(locations)) {
+      assert(expectLocs === taskLocs)
+    }
   }
 
-  def submitRdd(rdd: MyRDD, allowLocal: Boolean = false): Array[Int] = {
-    return scheduler.runJob[(Int, Int), Int](
+  /** When we submit dummy Jobs, this is the compute function we supply. Except in a local test
+   * below, we do not expect this function to ever be executed; instead, we will return results
+   * directly through CompletionEvents.
+   */
+  def jobComputeFunc(context: TaskContext, it: Iterator[(Int, Int)]): Int =
+     it.next._1.asInstanceOf[Int]
+
+
+  /** Start a job to compute the given RDD. Returns the JobWaiter that will
+   * collect the result of the job via callbacks from DAGScheduler. */
+  def submitRdd(rdd: MyRDD, allowLocal: Boolean = false): JobWaiter = {
+    val (toSubmit, waiter) = scheduler.prepareJob[(Int, Int), Int](
         rdd,
-        (context: TaskContext, it: Iterator[(Int, Int)]) => it.next._1.asInstanceOf[Int],
+        jobComputeFunc,
         (0 to (rdd.splits.size - 1)),
         "test-site",
         allowLocal
     )
+    lastJobWaiter = waiter
+    runEvent(toSubmit)
+    return waiter
+  }
+
+  /** Assert that a job we started has failed. */
+  def expectJobException(waiter: JobWaiter = lastJobWaiter) {
+    waiter.getResult match {
+      case JobSucceeded(_) => fail()
+      case JobFailed(_) => return
+    }
+  }
+
+  /** Assert that a job we started has succeeded and has the given result. */
+  def expectJobResult(expected: Array[Int], waiter: JobWaiter = lastJobWaiter) {
+    waiter.getResult match {
+      case JobSucceeded(answer) =>
+        assert(expected === answer.asInstanceOf[Seq[Int]].toArray )
+      case JobFailed(_) =>
+        fail()
+    }
   }
 
   def makeMapStatus(host: String, reduces: Int): MapStatus =
@@ -223,24 +291,14 @@ class DAGSchedulerSuite extends FunSuite
 
   test("zero split job") {
     val rdd = makeRdd(0, Nil)
-    resetExpecting {
-      expectGetLocations()
-      // deliberately expect no stages to be submitted
-    }
-    whenExecuting {
-      assert(submitRdd(rdd) === Array[Int]())
-    }
+    assert(scheduler.runJob(rdd, jobComputeFunc, Seq(), "test-site", false) === Array[Int]())
   }
 
   test("run trivial job") {
     val rdd = makeRdd(1, Nil)
-    resetExpecting {
-      expectGetLocations()
-      expectStage(rdd, List( (Success, 42) ))
-    }
-    whenExecuting {
-      assert(submitRdd(rdd) === Array(42))
-    }
+    val taskSet = interceptStage(rdd) { submitRdd(rdd) }
+    respondToTaskSet(taskSet, List( (Success, 42) ))
+    expectJobResult(Array(42))
   }
 
   test("local job") {
@@ -251,51 +309,34 @@ class DAGSchedulerSuite extends FunSuite
       override def getPreferredLocations(split: Split) = Nil
       override def toString = "DAGSchedulerSuite Local RDD"
     }
-    resetExpecting {
-      expectGetLocations()
-      // deliberately expect no stages to be submitted
-    }
-    whenExecuting {
-      assert(submitRdd(rdd, true) === Array(42))
-    }
+    submitRdd(rdd, true)
+    expectJobResult(Array(42))
   }
 
   test("run trivial job w/ dependency") {
     val baseRdd = makeRdd(1, Nil)
     val finalRdd = makeRdd(1, List(new OneToOneDependency(baseRdd)))
-    resetExpecting {
-      expectGetLocations()
-      expectStage(finalRdd, List( (Success, 42) ))
-    }
-    whenExecuting {
-      assert(submitRdd(finalRdd) === Array(42))
-    }
+    val taskSet = interceptStage(finalRdd) { submitRdd(finalRdd) }
+    respondToTaskSet(taskSet, List( (Success, 42) ))
+    expectJobResult(Array(42))
   }
 
-  test("location preferences w/ dependency") {
+  test("cache location preferences w/ dependency") {
     val baseRdd = makeRdd(1, Nil)
     val finalRdd = makeRdd(1, List(new OneToOneDependency(baseRdd)))
-    resetExpecting {
-      expectGetLocations()
-      cacheLocations(baseRdd.id -> 0) =
-        Seq(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))
-      expectStage(finalRdd, List( (Success, 42) ),
-                  Some(List(Seq("hostA", "hostB"))))
-    }
-    whenExecuting {
-      assert(submitRdd(finalRdd) === Array(42))
-    }
+    cacheLocations(baseRdd.id -> 0) =
+      Seq(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))
+    val taskSet = interceptStage(finalRdd) { submitRdd(finalRdd) }
+    expectTaskSetLocations(taskSet, List(Seq("hostA", "hostB")))
+    respondToTaskSet(taskSet, List( (Success, 42) ))
+    expectJobResult(Array(42))
   }
 
   test("trivial job failure") {
     val rdd = makeRdd(1, Nil)
-    resetExpecting {
-      expectGetLocations()
-      expectStageAnd(rdd, List()) { taskSet => scheduler.taskSetFailed(taskSet, "test failure") }
-    }
-    whenExecuting(taskScheduler, blockManagerMaster) {
-      intercept[SparkException] { submitRdd(rdd) }
-    }
+    val taskSet = interceptStage(rdd) { submitRdd(rdd) }
+    runEvent(TaskSetFailed(taskSet, "test failure"))
+    expectJobException()
   }
 
   test("run trivial shuffle") {
@@ -304,20 +345,17 @@ class DAGSchedulerSuite extends FunSuite
     val shuffleId = shuffleDep.shuffleId
     val reduceRdd = makeRdd(1, List(shuffleDep))
 
-    resetExpecting {
-      expectGetLocations()
-      expectStage(shuffleMapRdd, List(
+    val firstStage = interceptStage(shuffleMapRdd) { submitRdd(reduceRdd) }
+    val secondStage = interceptStage(reduceRdd) {
+      respondToTaskSet(firstStage, List(
         (Success, makeMapStatus("hostA", 1)),
         (Success, makeMapStatus("hostB", 1))
       ))
-      expectStageAnd(reduceRdd, List( (Success, 42) )) { _ =>
-        w { assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
-                   Array(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))) }
-      }
-    }
-    whenExecuting {
-      assert(submitRdd(reduceRdd) === Array(42))
     }
+    assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
+           Array(makeBlockManagerId("hostA"), makeBlockManagerId("hostB")))
+    respondToTaskSet(secondStage, List( (Success, 42) ))
+    expectJobResult(Array(42))
   }
 
   test("run trivial shuffle with fetch failure") {
@@ -326,28 +364,32 @@ class DAGSchedulerSuite extends FunSuite
     val shuffleId = shuffleDep.shuffleId
     val reduceRdd = makeRdd(2, List(shuffleDep))
 
-    resetExpecting {
-      expectGetLocations()
-      expectStage(shuffleMapRdd, List(
+    val firstStage = interceptStage(shuffleMapRdd) { submitRdd(reduceRdd) }
+    val secondStage = interceptStage(reduceRdd) {
+      respondToTaskSet(firstStage, List(
         (Success, makeMapStatus("hostA", 1)),
         (Success, makeMapStatus("hostB", 1))
       ))
+    }
+    resetExpecting {
       blockManagerMaster.removeExecutor("exec-hostA")
-      expectStage(reduceRdd, List(
+    }
+    whenExecuting {
+      respondToTaskSet(secondStage, List(
         (Success, 42),
         (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0), null)
       ))
-      // partial recompute
-      expectStage(shuffleMapRdd, List( (Success, makeMapStatus("hostA", 1)) ))
-      expectStageAnd(reduceRdd, List( (Success, 43) )) { _ =>
-        w { assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
-                   Array(makeBlockManagerId("hostA"),
-                         makeBlockManagerId("hostB"))) }
-      }
     }
-    whenExecuting {
-      assert(submitRdd(reduceRdd) === Array(42, 43))
+    val thirdStage = interceptStage(shuffleMapRdd) {
+      scheduler.resubmitFailedStages()
     }
+    val fourthStage = interceptStage(reduceRdd) {
+      respondToTaskSet(thirdStage, List( (Success, makeMapStatus("hostA", 1)) ))
+    }
+    assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
+                   Array(makeBlockManagerId("hostA"), makeBlockManagerId("hostB")))
+    respondToTaskSet(fourthStage, List( (Success, 43) ))
+    expectJobResult(Array(42, 43))
   }
 
   test("ignore late map task completions") {
@@ -356,63 +398,64 @@ class DAGSchedulerSuite extends FunSuite
     val shuffleId = shuffleDep.shuffleId
     val reduceRdd = makeRdd(2, List(shuffleDep))
 
+    val taskSet = interceptStage(shuffleMapRdd) { submitRdd(reduceRdd) }
+    val oldGeneration = mapOutputTracker.getGeneration
     resetExpecting {
-      expectGetLocations()
-      expectStageAnd(shuffleMapRdd, List(
-        (Success, makeMapStatus("hostA", 1))
-      )) { taskSet =>
-        val newGeneration = mapOutputTracker.getGeneration + 1
-        scheduler.executorLost("exec-hostA")
-        val noAccum = Map[Long, Any]()
-        // We rely on the event queue being ordered and increasing the generation number by 1
-        // should be ignored for being too old
-        scheduler.taskEnded(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum)
-        // should work because it's a non-failed host
-        scheduler.taskEnded(taskSet.tasks(0), Success, makeMapStatus("hostB", 1), noAccum)
-        // should be ignored for being too old
-        scheduler.taskEnded(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum)
-        // should be ignored (not end the stage) because it's too old
-        scheduler.taskEnded(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum)
-        taskSet.tasks(1).generation = newGeneration
-        scheduler.taskEnded(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum)
-      }
       blockManagerMaster.removeExecutor("exec-hostA")
-      expectStageAnd(reduceRdd, List(
-        (Success, 42), (Success, 43)
-      )) { _ =>
-        w { assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
-                   Array(makeBlockManagerId("hostB"), makeBlockManagerId("hostA"))) }
-      }
     }
     whenExecuting {
-      assert(submitRdd(reduceRdd) === Array(42, 43))
+      runEvent(ExecutorLost("exec-hostA"))
     }
+    val newGeneration = mapOutputTracker.getGeneration
+    assert(newGeneration > oldGeneration)
+    val noAccum = Map[Long, Any]()
+    // We rely on the event queue being ordered and increasing the generation number by 1
+    // should be ignored for being too old
+    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum))
+    // should work because it's a non-failed host
+    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostB", 1), noAccum))
+    // should be ignored for being too old
+    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum))
+    taskSet.tasks(1).generation = newGeneration
+    val secondStage = interceptStage(reduceRdd) {
+      runEvent(CompletionEvent(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum))
+    }
+    assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
+           Array(makeBlockManagerId("hostB"), makeBlockManagerId("hostA")))
+    respondToTaskSet(secondStage, List( (Success, 42), (Success, 43) ))
+    expectJobResult(Array(42, 43))
   }
 
-  test("run trivial shuffle with out-of-band failure") {
+  test("run trivial shuffle with out-of-band failure and retry") {
     val shuffleMapRdd = makeRdd(2, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
     val shuffleId = shuffleDep.shuffleId
     val reduceRdd = makeRdd(1, List(shuffleDep))
+
+    val firstStage = interceptStage(shuffleMapRdd) { submitRdd(reduceRdd) }
     resetExpecting {
-      expectGetLocations()
       blockManagerMaster.removeExecutor("exec-hostA")
-      expectStageAnd(shuffleMapRdd, List(
-        (Success, makeMapStatus("hostA", 1)),
-        (Success, makeMapStatus("hostB", 1))
-      )) { _ => scheduler.executorLost("exec-hostA") }
-      expectStage(shuffleMapRdd, List(
-        (Success, makeMapStatus("hostC", 1))
-      ))
-      expectStageAnd(reduceRdd, List( (Success, 42) )) { _ =>
-        w { assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
-                   Array(makeBlockManagerId("hostC"),
-                         makeBlockManagerId("hostB"))) }
-      }
     }
     whenExecuting {
-      assert(submitRdd(reduceRdd) === Array(42))
+      runEvent(ExecutorLost("exec-hostA"))
     }
+    // DAGScheduler will immediately resubmit the stage after it appears to have no pending tasks
+    // rather than marking it is as failed and waiting.
+    val secondStage = interceptStage(shuffleMapRdd) {
+      respondToTaskSet(firstStage, List(
+        (Success, makeMapStatus("hostA", 1)),
+        (Success, makeMapStatus("hostB", 1))
+      ))
+    }
+    val thirdStage = interceptStage(reduceRdd) {
+      respondToTaskSet(secondStage, List(
+        (Success, makeMapStatus("hostC", 1))
+      ))
+    }
+    assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
+           Array(makeBlockManagerId("hostC"), makeBlockManagerId("hostB")))
+    respondToTaskSet(thirdStage, List( (Success, 42) ))
+    expectJobResult(Array(42))
   }
 
   test("recursive shuffle failures") {
@@ -422,34 +465,42 @@ class DAGSchedulerSuite extends FunSuite
     val shuffleDepTwo = new ShuffleDependency(shuffleTwoRdd, null)
     val finalRdd = makeRdd(1, List(shuffleDepTwo))
 
-    resetExpecting {
-      expectGetLocations()
-      expectStage(shuffleOneRdd, List(
-        (Success, makeMapStatus("hostA", 1)),
-        (Success, makeMapStatus("hostB", 1))
+    val firstStage = interceptStage(shuffleOneRdd) { submitRdd(finalRdd) }
+    val secondStage = interceptStage(shuffleTwoRdd) {
+      respondToTaskSet(firstStage, List(
+        (Success, makeMapStatus("hostA", 2)),
+        (Success, makeMapStatus("hostB", 2))
       ))
-      expectStage(shuffleTwoRdd, List(
+    }
+    val thirdStage = interceptStage(finalRdd) {
+      respondToTaskSet(secondStage, List(
         (Success, makeMapStatus("hostA", 1)),
         (Success, makeMapStatus("hostC", 1))
       ))
+    }
+    resetExpecting {
       blockManagerMaster.removeExecutor("exec-hostA")
-      expectStage(finalRdd, List(
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
-      ))
-      // triggers a partial recompute of the first stage, then the second
-      expectStage(shuffleOneRdd, List(
-        (Success, makeMapStatus("hostA", 1))
-      ))
-      expectStage(shuffleTwoRdd, List(
-        (Success, makeMapStatus("hostA", 1))
-      ))
-      expectStage(finalRdd, List(
-        (Success, 42)
-      ))
     }
     whenExecuting {
-      assert(submitRdd(finalRdd) === Array(42))
+      respondToTaskSet(thirdStage, List(
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
+      ))
     }
+    val recomputeOne = interceptStage(shuffleOneRdd) {
+      scheduler.resubmitFailedStages
+    }
+    val recomputeTwo = interceptStage(shuffleTwoRdd) {
+      respondToTaskSet(recomputeOne, List(
+        (Success, makeMapStatus("hostA", 2))
+      ))
+    }
+    val finalStage = interceptStage(finalRdd) {
+      respondToTaskSet(recomputeTwo, List(
+        (Success, makeMapStatus("hostA", 1))
+      ))
+    }
+    respondToTaskSet(finalStage, List( (Success, 42) ))
+    expectJobResult(Array(42))
   }
 
   test("cached post-shuffle") {
@@ -459,35 +510,41 @@ class DAGSchedulerSuite extends FunSuite
     val shuffleDepTwo = new ShuffleDependency(shuffleTwoRdd, null)
     val finalRdd = makeRdd(1, List(shuffleDepTwo))
 
-    resetExpecting {
-      expectGetLocations()
-      expectStage(shuffleOneRdd, List(
+    val firstShuffleStage = interceptStage(shuffleOneRdd) { submitRdd(finalRdd) }
+    cacheLocations(shuffleTwoRdd.id -> 0) = Seq(makeBlockManagerId("hostD"))
+    cacheLocations(shuffleTwoRdd.id -> 1) = Seq(makeBlockManagerId("hostC"))
+    val secondShuffleStage = interceptStage(shuffleTwoRdd) {
+      respondToTaskSet(firstShuffleStage, List(
+        (Success, makeMapStatus("hostA", 2)),
+        (Success, makeMapStatus("hostB", 2))
+      ))
+    }
+    val reduceStage = interceptStage(finalRdd) {
+      respondToTaskSet(secondShuffleStage, List(
         (Success, makeMapStatus("hostA", 1)),
         (Success, makeMapStatus("hostB", 1))
       ))
-      expectStageAnd(shuffleTwoRdd, List(
-        (Success, makeMapStatus("hostA", 1)),
-        (Success, makeMapStatus("hostC", 1))
-      )){ _ =>
-        cacheLocations(shuffleTwoRdd.id -> 0) = Seq(makeBlockManagerId("hostD"))
-        cacheLocations(shuffleTwoRdd.id -> 1) = Seq(makeBlockManagerId("hostC"))
-      }
+    }
+    resetExpecting {
       blockManagerMaster.removeExecutor("exec-hostA")
-      expectStage(finalRdd, List(
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
-      ))
-      // since we have a cached copy of the missing split of shuffleTwoRdd, we shouldn't
-      // immediately try to rerun shuffleOneRdd:
-      expectStage(shuffleTwoRdd, List(
-        (Success, makeMapStatus("hostD", 1))
-      ), Some(Seq(List("hostD"))))
-      expectStage(finalRdd, List(
-        (Success, 42)
-      ))
     }
     whenExecuting {
-      assert(submitRdd(finalRdd) === Array(42))
+      respondToTaskSet(reduceStage, List(
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
+      ))
     }
+    // DAGScheduler should notice the cached copy of the second shuffle and try to get it rerun.
+    val recomputeTwo = interceptStage(shuffleTwoRdd) {
+      scheduler.resubmitFailedStages()
+    }
+    expectTaskSetLocations(recomputeTwo, Seq(Seq("hostD")))
+    val finalRetry = interceptStage(finalRdd) {
+      respondToTaskSet(recomputeTwo, List(
+        (Success, makeMapStatus("hostD", 1))
+      ))
+    }
+    respondToTaskSet(finalRetry, List( (Success, 42) ))
+    expectJobResult(Array(42))
   }
 
   test("cached post-shuffle but fails") {
@@ -497,45 +554,58 @@ class DAGSchedulerSuite extends FunSuite
     val shuffleDepTwo = new ShuffleDependency(shuffleTwoRdd, null)
     val finalRdd = makeRdd(1, List(shuffleDepTwo))
 
-    resetExpecting {
-      expectGetLocations()
-      expectStage(shuffleOneRdd, List(
+    val firstShuffleStage = interceptStage(shuffleOneRdd) { submitRdd(finalRdd) }
+    cacheLocations(shuffleTwoRdd.id -> 0) = Seq(makeBlockManagerId("hostD"))
+    cacheLocations(shuffleTwoRdd.id -> 1) = Seq(makeBlockManagerId("hostC"))
+    val secondShuffleStage = interceptStage(shuffleTwoRdd) {
+      respondToTaskSet(firstShuffleStage, List(
+        (Success, makeMapStatus("hostA", 2)),
+        (Success, makeMapStatus("hostB", 2))
+      ))
+    }
+    val reduceStage = interceptStage(finalRdd) {
+      respondToTaskSet(secondShuffleStage, List(
         (Success, makeMapStatus("hostA", 1)),
         (Success, makeMapStatus("hostB", 1))
       ))
-      expectStageAnd(shuffleTwoRdd, List(
-        (Success, makeMapStatus("hostA", 1)),
-        (Success, makeMapStatus("hostC", 1))
-      )){ _ =>
-        cacheLocations(shuffleTwoRdd.id -> 0) = Seq(makeBlockManagerId("hostD"))
-        cacheLocations(shuffleTwoRdd.id -> 1) = Seq(makeBlockManagerId("hostC"))
-      }
+    }
+    resetExpecting {
       blockManagerMaster.removeExecutor("exec-hostA")
-      expectStage(finalRdd, List(
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
-      ))
-      // since we have a cached copy of the missing split of shuffleTwoRdd, we shouldn't
-      // immediately try to rerun shuffleOneRdd:
-      expectStageAnd(shuffleTwoRdd, List(
-        (FetchFailed(null, shuffleDepOne.shuffleId, 0, 0), null)
-      ), Some(Seq(List("hostD")))) { _ =>
-        w {
-          intercept[FetchFailedException]{
-            mapOutputTracker.getServerStatuses(shuffleDepOne.shuffleId, 0)
-          }
-        }
-        cacheLocations.remove(shuffleTwoRdd.id -> 0)
-      }
-      // after that fetch failure, we should refetch the cache locations and try to recompute
-      // the whole chain. Note that we will ignore that a fetch failure previously occured on
-      // this host.
-      expectStage(shuffleOneRdd, List( (Success, makeMapStatus("hostA", 1)) ))
-      expectStage(shuffleTwoRdd, List( (Success, makeMapStatus("hostA", 1)) ))
-      expectStage(finalRdd, List( (Success, 42) ))
     }
     whenExecuting {
-      assert(submitRdd(finalRdd) === Array(42))
+      respondToTaskSet(reduceStage, List(
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)
+      ))
     }
+    val recomputeTwoCached = interceptStage(shuffleTwoRdd) {
+      scheduler.resubmitFailedStages()
+    }
+    expectTaskSetLocations(recomputeTwoCached, Seq(Seq("hostD")))
+    intercept[FetchFailedException]{
+      mapOutputTracker.getServerStatuses(shuffleDepOne.shuffleId, 0)
+    }
+
+    // Simulate the shuffle input data failing to be cached.
+    cacheLocations.remove(shuffleTwoRdd.id -> 0)
+    respondToTaskSet(recomputeTwoCached, List(
+      (FetchFailed(null, shuffleDepOne.shuffleId, 0, 0), null)
+    ))
+
+    // After the fetch failure, DAGScheduler should recheck the cache and decide to resubmit
+    // everything.
+    val recomputeOne = interceptStage(shuffleOneRdd) {
+      scheduler.resubmitFailedStages()
+    }
+    // We use hostA here to make sure DAGScheduler doesn't think it's still dead.
+    val recomputeTwoUncached = interceptStage(shuffleTwoRdd) {
+      respondToTaskSet(recomputeOne, List( (Success, makeMapStatus("hostA", 1)) ))
+    }
+    expectTaskSetLocations(recomputeTwoUncached, Seq(Seq[String]()))
+    val finalRetry = interceptStage(finalRdd) {
+      respondToTaskSet(recomputeTwoUncached, List( (Success, makeMapStatus("hostA", 1)) ))
+
+    }
+    respondToTaskSet(finalRetry, List( (Success, 42) ))
+    expectJobResult(Array(42))
   }
 }
-

From 7f51458774ce4561f1df3ba9b68704c3f63852f3 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 30 Jan 2013 09:34:53 -0800
Subject: [PATCH 236/291] Comment at top of DAGSchedulerSuite

---
 .../scala/spark/scheduler/DAGSchedulerSuite.scala | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
index 89173540d4..c31e2e7064 100644
--- a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
@@ -30,9 +30,22 @@ import spark.TaskEndReason
 
 import spark.{FetchFailed, Success}
 
+/**
+ * Tests for DAGScheduler. These tests directly call the event processing functinos in DAGScheduler
+ * rather than spawning an event loop thread as happens in the real code. They use EasyMock
+ * to mock out two classes that DAGScheduler interacts with: TaskScheduler (to which TaskSets are
+ * submitted) and BlockManagerMaster (from which cache locations are retrieved and to which dead
+ * host notifications are sent). In addition, tests may check for side effects on a non-mocked
+ * MapOutputTracker instance.
+ *
+ * Tests primarily consist of running DAGScheduler#processEvent and
+ * DAGScheduler#submitWaitingStages (via test utility functions like runEvent or respondToTaskSet)
+ * and capturing the resulting TaskSets from the mock TaskScheduler.
+ */
 class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar with TimeLimitedTests {
 
-  // impose a time limit on this test in case we don't let the job finish.
+  // impose a time limit on this test in case we don't let the job finish, in which case
+  // JobWaiter#getResult will hang.
   override val timeLimit = Span(5, Seconds)
 
   val sc: SparkContext = new SparkContext("local", "DAGSchedulerSuite")

From f7de6978c14a331683e4a341fccd6e4c5e9fa523 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Tue, 29 Jan 2013 14:03:05 -0800
Subject: [PATCH 237/291] Use Mesos ExecutorIDs to hold SlaveIDs. Then we can
 safely use the Mesos ExecutorID as a Spark ExecutorID.

---
 .../spark/executor/MesosExecutorBackend.scala |  6 +++-
 .../mesos/MesosSchedulerBackend.scala         | 30 ++++++++++---------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
index 1ef88075ad..b981b26916 100644
--- a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
@@ -32,7 +32,11 @@ private[spark] class MesosExecutorBackend(executor: Executor)
     logInfo("Registered with Mesos as executor ID " + executorInfo.getExecutorId.getValue)
     this.driver = driver
     val properties = Utils.deserialize[Array[(String, String)]](executorInfo.getData.toByteArray)
-    executor.initialize(executorInfo.getExecutorId.getValue, slaveInfo.getHostname, properties)
+    executor.initialize(
+      slaveInfo.getId.getValue + "-" + executorInfo.getExecutorId.getValue,
+      slaveInfo.getHostname,
+      properties
+    )
   }
 
   override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo) {
diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
index f3467db86b..eab1c60e0b 100644
--- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
@@ -51,7 +51,7 @@ private[spark] class MesosSchedulerBackend(
   val taskIdToSlaveId = new HashMap[Long, String]
 
   // An ExecutorInfo for our tasks
-  var executorInfo: ExecutorInfo = null
+  var execArgs: Array[Byte] = null
 
   override def start() {
     synchronized {
@@ -70,12 +70,11 @@ private[spark] class MesosSchedulerBackend(
         }
       }.start()
 
-      executorInfo = createExecutorInfo()
       waitForRegister()
     }
   }
 
-  def createExecutorInfo(): ExecutorInfo = {
+  def createExecutorInfo(execId: String): ExecutorInfo = {
     val sparkHome = sc.getSparkHome().getOrElse(throw new SparkException(
       "Spark home is not set; set it through the spark.home system " +
       "property, the SPARK_HOME environment variable or the SparkContext constructor"))
@@ -97,7 +96,7 @@ private[spark] class MesosSchedulerBackend(
       .setEnvironment(environment)
       .build()
     ExecutorInfo.newBuilder()
-      .setExecutorId(ExecutorID.newBuilder().setValue("default").build())
+      .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
       .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
@@ -109,17 +108,20 @@ private[spark] class MesosSchedulerBackend(
    * containing all the spark.* system properties in the form of (String, String) pairs.
    */
   private def createExecArg(): Array[Byte] = {
-    val props = new HashMap[String, String]
-    val iterator = System.getProperties.entrySet.iterator
-    while (iterator.hasNext) {
-      val entry = iterator.next
-      val (key, value) = (entry.getKey.toString, entry.getValue.toString)
-      if (key.startsWith("spark.")) {
-        props(key) = value
+    if (execArgs == null) {
+      val props = new HashMap[String, String]
+      val iterator = System.getProperties.entrySet.iterator
+      while (iterator.hasNext) {
+        val entry = iterator.next
+        val (key, value) = (entry.getKey.toString, entry.getValue.toString)
+        if (key.startsWith("spark.")) {
+          props(key) = value
+        }
       }
+      // Serialize the map as an array of (String, String) pairs
+      execArgs = Utils.serialize(props.toArray)
     }
-    // Serialize the map as an array of (String, String) pairs
-    return Utils.serialize(props.toArray)
+    return execArgs
   }
 
   override def offerRescinded(d: SchedulerDriver, o: OfferID) {}
@@ -216,7 +218,7 @@ private[spark] class MesosSchedulerBackend(
     return MesosTaskInfo.newBuilder()
       .setTaskId(taskId)
       .setSlaveId(SlaveID.newBuilder().setValue(slaveId).build())
-      .setExecutor(executorInfo)
+      .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
       .setData(ByteString.copyFrom(task.serializedTask))

From 252845d3046034d6e779bd7245d2f876debba8fd Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Wed, 30 Jan 2013 10:38:06 -0800
Subject: [PATCH 238/291] Remove remants of attempt to use slaveId-executorId
 in MesosExecutorBackend

---
 core/src/main/scala/spark/executor/MesosExecutorBackend.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
index b981b26916..818d6d1dda 100644
--- a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
@@ -33,7 +33,7 @@ private[spark] class MesosExecutorBackend(executor: Executor)
     this.driver = driver
     val properties = Utils.deserialize[Array[(String, String)]](executorInfo.getData.toByteArray)
     executor.initialize(
-      slaveInfo.getId.getValue + "-" + executorInfo.getExecutorId.getValue,
+      executorInfo.getExecutorId.getValue,
       slaveInfo.getHostname,
       properties
     )

From 871476d506a2d543482defb923a42a2a01f206ab Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Wed, 30 Jan 2013 16:56:46 -0600
Subject: [PATCH 239/291] Include message and exitStatus if availalbe.

---
 core/src/main/scala/spark/deploy/worker/Worker.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala
index 5a83a42daf..8b41620d98 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/spark/deploy/worker/Worker.scala
@@ -134,7 +134,9 @@ private[spark] class Worker(
       val fullId = jobId + "/" + execId
       if (ExecutorState.isFinished(state)) {
         val executor = executors(fullId)
-        logInfo("Executor " + fullId + " finished with state " + state)
+        logInfo("Executor " + fullId + " finished with state " + state +
+          message.map(" message " + _).getOrElse("") +
+          exitStatus.map(" exitStatus " + _).getOrElse(""))
         finishedExecutors(fullId) = executor
         executors -= fullId
         coresUsed -= executor.cores

From 58a7d320d7287f3773976f0efdf2bc2c1474f7f9 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Wed, 30 Jan 2013 14:49:18 -0800
Subject: [PATCH 240/291] Inclue packaging and launching pyspark in guide.

It's nicer if all the commands you need are made explicit.
---
 docs/python-programming-guide.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index a840b9b34b..94311bd102 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -67,8 +67,14 @@ The script automatically adds the `pyspark` package to the `PYTHONPATH`.
 
 # Interactive Use
 
-The `pyspark` script launches a Python interpreter that is configured to run PySpark jobs.
-When run without any input files, `pyspark` launches a shell that can be used explore data interactively, which is a simple way to learn the API:
+The `pyspark` script launches a Python interpreter that is configured to run PySpark jobs. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options:
+
+{% highlight bash %}
+$ sbt/sbt package
+$ ./pyspark
+{% endhighlight %}
+
+The Python shell can be used explore data interactively and is a simple way to learn the API:
 
 {% highlight python %}
 >>> words = sc.textFile("/usr/share/dict/words")

From 3f945e3b830c5a7d50acd61c5aabf964f40f7f4b Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Wed, 30 Jan 2013 15:04:06 -0800
Subject: [PATCH 241/291] Make module help available in python shell.

Also, adds a line in doc explaining how to use.
---
 docs/python-programming-guide.md | 1 +
 python/pyspark/shell.py          | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index 94311bd102..4e84d23edf 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -80,6 +80,7 @@ The Python shell can be used explore data interactively and is a simple way to l
 >>> words = sc.textFile("/usr/share/dict/words")
 >>> words.filter(lambda w: w.startswith("spar")).take(5)
 [u'spar', u'sparable', u'sparada', u'sparadrap', u'sparagrass']
+>>> help(pyspark) # Show all pyspark functions
 {% endhighlight %}
 
 By default, the `pyspark` shell creates SparkContext that runs jobs locally.
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index f6328c561f..54ff1bf8e7 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -4,6 +4,7 @@ An interactive shell.
 This file is designed to be launched as a PYTHONSTARTUP script.
 """
 import os
+import pyspark
 from pyspark.context import SparkContext
 
 

From c1df24d0850b0ac89f35f1a47ce6b2fb5b95df0a Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Wed, 30 Jan 2013 18:51:14 -0800
Subject: [PATCH 242/291] rename Slaves --> Executor

---
 core/src/main/scala/spark/SparkContext.scala           | 6 +++---
 core/src/main/scala/spark/storage/BlockManagerUI.scala | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index a09eca1dd0..39e3555de8 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -468,7 +468,7 @@ class SparkContext(
    * Return a map from the slave to the max memory available for caching and the remaining
    * memory available for caching.
    */
-  def getSlavesMemoryStatus: Map[String, (Long, Long)] = {
+  def getExecutorMemoryStatus: Map[String, (Long, Long)] = {
     env.blockManager.master.getMemoryStatus.map { case(blockManagerId, mem) =>
       (blockManagerId.ip + ":" + blockManagerId.port, mem)
     }
@@ -479,13 +479,13 @@ class SparkContext(
    * they take, etc.
    */
   def getRDDStorageInfo : Array[RDDInfo] = {
-    StorageUtils.rddInfoFromStorageStatus(getSlavesStorageStatus, this)
+    StorageUtils.rddInfoFromStorageStatus(getExecutorStorageStatus, this)
   }
 
   /**
    * Return information about blocks stored in all of the slaves
    */
-  def getSlavesStorageStatus : Array[StorageStatus] = {
+  def getExecutorStorageStatus : Array[StorageStatus] = {
     env.blockManager.master.getStorageStatus
   }
 
diff --git a/core/src/main/scala/spark/storage/BlockManagerUI.scala b/core/src/main/scala/spark/storage/BlockManagerUI.scala
index 52f6d1b657..9e6721ec17 100644
--- a/core/src/main/scala/spark/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerUI.scala
@@ -45,7 +45,7 @@ class BlockManagerUI(val actorSystem: ActorSystem, blockManagerMaster: ActorRef,
       path("") {
         completeWith {
           // Request the current storage status from the Master
-          val storageStatusList = sc.getSlavesStorageStatus
+          val storageStatusList = sc.getExecutorStorageStatus
           // Calculate macro-level statistics
           val maxMem = storageStatusList.map(_.maxMem).reduce(_+_)
           val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_)
@@ -60,7 +60,7 @@ class BlockManagerUI(val actorSystem: ActorSystem, blockManagerMaster: ActorRef,
         parameter("id") { id =>
           completeWith {
             val prefix = "rdd_" + id.toString
-            val storageStatusList = sc.getSlavesStorageStatus
+            val storageStatusList = sc.getExecutorStorageStatus
             val filteredStorageStatusList = StorageUtils.
               filterStorageStatusByPrefix(storageStatusList, prefix)
             val rddInfo = StorageUtils.rddInfoFromStorageStatus(filteredStorageStatusList, sc).head

From fe3eceab5724bec0103471eb905bb9701120b04a Mon Sep 17 00:00:00 2001
From: Mikhail Bautin <mbautin@gmail.com>
Date: Thu, 31 Jan 2013 13:30:41 -0800
Subject: [PATCH 243/291] Remove activation of profiles by default

See the discussion at https://github.com/mesos/spark/pull/355 for why
default profile activation is a problem.
---
 bagel/pom.xml     | 11 -----------
 core/pom.xml      | 11 -----------
 examples/pom.xml  | 11 -----------
 pom.xml           | 11 -----------
 repl-bin/pom.xml  | 11 -----------
 repl/pom.xml      | 11 -----------
 streaming/pom.xml | 11 -----------
 7 files changed, 77 deletions(-)

diff --git a/bagel/pom.xml b/bagel/pom.xml
index 5f58347204..a8256a6e8b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -45,11 +45,6 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
-      <activation>
-        <property>
-          <name>!hadoopVersion</name>
-        </property>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
@@ -77,12 +72,6 @@
     </profile>
     <profile>
       <id>hadoop2</id>
-      <activation>
-        <property>
-          <name>hadoopVersion</name>
-          <value>2</value>
-        </property>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
diff --git a/core/pom.xml b/core/pom.xml
index 862d3ec37a..873e8a1d0f 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -163,11 +163,6 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
-      <activation>
-        <property>
-          <name>!hadoopVersion</name>
-        </property>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
@@ -220,12 +215,6 @@
     </profile>
     <profile>
       <id>hadoop2</id>
-      <activation>
-        <property>
-          <name>hadoopVersion</name>
-          <value>2</value>
-        </property>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
diff --git a/examples/pom.xml b/examples/pom.xml
index 4d43103475..f43af670c6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -50,11 +50,6 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
-      <activation>
-        <property>
-          <name>!hadoopVersion</name>
-        </property>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
@@ -88,12 +83,6 @@
     </profile>
     <profile>
       <id>hadoop2</id>
-      <activation>
-        <property>
-          <name>hadoopVersion</name>
-          <value>2</value>
-        </property>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
diff --git a/pom.xml b/pom.xml
index 3ea989a082..c6b9012dc6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -499,11 +499,6 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
-      <activation>
-        <property>
-          <name>!hadoopVersion</name>
-        </property>
-      </activation>
 
       <properties>
         <hadoop.major.version>1</hadoop.major.version>
@@ -521,12 +516,6 @@
 
     <profile>
       <id>hadoop2</id>
-      <activation>
-        <property>
-          <name>hadoopVersion</name>
-          <value>2</value>
-        </property>
-      </activation>
       <properties>
         <hadoop.major.version>2</hadoop.major.version>
       </properties>
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index da91c0f3ab..0667b71cc7 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -70,11 +70,6 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
-      <activation>
-        <property>
-          <name>!hadoopVersion</name>
-        </property>
-      </activation>
       <properties>
         <classifier>hadoop1</classifier>
       </properties>
@@ -115,12 +110,6 @@
     </profile>
     <profile>
       <id>hadoop2</id>
-      <activation>
-        <property>
-          <name>hadoopVersion</name>
-          <value>2</value>
-        </property>
-      </activation>
       <properties>
         <classifier>hadoop2</classifier>
       </properties>
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dc96beaf5..4a296fa630 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -72,11 +72,6 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
-      <activation>
-        <property>
-          <name>!hadoopVersion</name>
-        </property>
-      </activation>
       <properties>
         <classifier>hadoop1</classifier>
       </properties>
@@ -128,12 +123,6 @@
     </profile>
     <profile>
       <id>hadoop2</id>
-      <activation>
-        <property>
-          <name>hadoopVersion</name>
-          <value>2</value>
-        </property>
-      </activation>
       <properties>
         <classifier>hadoop2</classifier>
       </properties>
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 3dae815e1a..6ee7e59df3 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -83,11 +83,6 @@
   <profiles>
     <profile>
       <id>hadoop1</id>
-      <activation>
-        <property>
-          <name>!hadoopVersion</name>
-        </property>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>
@@ -115,12 +110,6 @@
     </profile>
     <profile>
       <id>hadoop2</id>
-      <activation>
-        <property>
-          <name>hadoopVersion</name>
-          <value>2</value>
-        </property>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.spark-project</groupId>

From 418e36caa8fcd9a70026ab762ec709732fdebd6b Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Thu, 31 Jan 2013 17:18:33 -0600
Subject: [PATCH 244/291] Add more private declarations.

---
 .../main/scala/spark/MapOutputTracker.scala   |  2 +-
 .../spark/deploy/master/MasterWebUI.scala     | 22 +++------
 .../scala/spark/scheduler/DAGScheduler.scala  | 46 ++++++++++---------
 .../spark/scheduler/ShuffleMapTask.scala      |  3 +-
 .../scheduler/cluster/ClusterScheduler.scala  |  2 +-
 .../scheduler/cluster/TaskSetManager.scala    | 19 ++++----
 .../scheduler/local/LocalScheduler.scala      |  4 +-
 .../scala/spark/util/MetadataCleaner.scala    | 10 ++--
 8 files changed, 49 insertions(+), 59 deletions(-)

diff --git a/core/src/main/scala/spark/MapOutputTracker.scala b/core/src/main/scala/spark/MapOutputTracker.scala
index aaf433b324..4735207585 100644
--- a/core/src/main/scala/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/spark/MapOutputTracker.scala
@@ -170,7 +170,7 @@ private[spark] class MapOutputTracker(actorSystem: ActorSystem, isDriver: Boolea
     }
   }
 
-  def cleanup(cleanupTime: Long) {
+  private def cleanup(cleanupTime: Long) {
     mapStatuses.clearOldValues(cleanupTime)
     cachedSerializedStatuses.clearOldValues(cleanupTime)
   }
diff --git a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
index a01774f511..529f72e9da 100644
--- a/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
+++ b/core/src/main/scala/spark/deploy/master/MasterWebUI.scala
@@ -45,13 +45,9 @@ class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Direct
           case (jobId, Some(js)) if (js.equalsIgnoreCase("json")) =>
             val future = master ? RequestMasterState
             val jobInfo = for (masterState <- future.mapTo[MasterState]) yield {
-              masterState.activeJobs.find(_.id == jobId) match {
-                case Some(job) => job
-                case _ => masterState.completedJobs.find(_.id == jobId) match {
-                  case Some(job) => job
-                  case _ => null
-                }
-              }
+              masterState.activeJobs.find(_.id == jobId).getOrElse({
+                masterState.completedJobs.find(_.id == jobId).getOrElse(null)
+              })
             }
             respondWithMediaType(MediaTypes.`application/json`) { ctx =>
               ctx.complete(jobInfo.mapTo[JobInfo])
@@ -61,14 +57,10 @@ class MasterWebUI(val actorSystem: ActorSystem, master: ActorRef) extends Direct
               val future = master ? RequestMasterState
               future.map { state =>
                 val masterState = state.asInstanceOf[MasterState]
-
-                masterState.activeJobs.find(_.id == jobId) match {
-                  case Some(job) => spark.deploy.master.html.job_details.render(job)
-                  case _ => masterState.completedJobs.find(_.id == jobId) match {
-                    case Some(job) => spark.deploy.master.html.job_details.render(job)
-                    case _ => null
-                  }
-                }
+                val job = masterState.activeJobs.find(_.id == jobId).getOrElse({
+                  masterState.completedJobs.find(_.id == jobId).getOrElse(null)
+                })
+                spark.deploy.master.html.job_details.render(job)
               }
             }
         }
diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index b130be6a38..14f61f7e87 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -97,7 +97,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     }
   }.start()
 
-  def getCacheLocs(rdd: RDD[_]): Array[List[String]] = {
+  private def getCacheLocs(rdd: RDD[_]): Array[List[String]] = {
     if (!cacheLocs.contains(rdd.id)) {
       val blockIds = rdd.splits.indices.map(index=> "rdd_%d_%d".format(rdd.id, index)).toArray
       cacheLocs(rdd.id) = blockManagerMaster.getLocations(blockIds).map {
@@ -107,7 +107,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     cacheLocs(rdd.id)
   }
 
-  def clearCacheLocs() {
+  private def clearCacheLocs() {
     cacheLocs.clear()
   }
 
@@ -116,7 +116,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
    * The priority value passed in will be used if the stage doesn't already exist with
    * a lower priority (we assume that priorities always increase across jobs for now).
    */
-  def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], priority: Int): Stage = {
+  private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], priority: Int): Stage = {
     shuffleToMapStage.get(shuffleDep.shuffleId) match {
       case Some(stage) => stage
       case None =>
@@ -131,11 +131,11 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
    * as a result stage for the final RDD used directly in an action. The stage will also be given
    * the provided priority.
    */
-  def newStage(rdd: RDD[_], shuffleDep: Option[ShuffleDependency[_,_]], priority: Int): Stage = {
-    // Kind of ugly: need to register RDDs with the cache and map output tracker here
-    // since we can't do it in the RDD constructor because # of splits is unknown
-    logInfo("Registering RDD " + rdd.id + " (" + rdd.origin + ")")
+  private def newStage(rdd: RDD[_], shuffleDep: Option[ShuffleDependency[_,_]], priority: Int): Stage = {
     if (shuffleDep != None) {
+      // Kind of ugly: need to register RDDs with the cache and map output tracker here
+      // since we can't do it in the RDD constructor because # of splits is unknown
+      logInfo("Registering RDD " + rdd.id + " (" + rdd.origin + ")")
       mapOutputTracker.registerShuffle(shuffleDep.get.shuffleId, rdd.splits.size)
     }
     val id = nextStageId.getAndIncrement()
@@ -148,7 +148,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
    * Get or create the list of parent stages for a given RDD. The stages will be assigned the
    * provided priority if they haven't already been created with a lower priority.
    */
-  def getParentStages(rdd: RDD[_], priority: Int): List[Stage] = {
+  private def getParentStages(rdd: RDD[_], priority: Int): List[Stage] = {
     val parents = new HashSet[Stage]
     val visited = new HashSet[RDD[_]]
     def visit(r: RDD[_]) {
@@ -170,7 +170,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     parents.toList
   }
 
-  def getMissingParentStages(stage: Stage): List[Stage] = {
+  private def getMissingParentStages(stage: Stage): List[Stage] = {
     val missing = new HashSet[Stage]
     val visited = new HashSet[RDD[_]]
     def visit(rdd: RDD[_]) {
@@ -241,7 +241,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
    * events and responds by launching tasks. This runs in a dedicated thread and receives events
    * via the eventQueue.
    */
-  def run() {
+  private def run() {
     SparkEnv.set(env)
 
     while (true) {
@@ -326,7 +326,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
    * We run the operation in a separate thread just in case it takes a bunch of time, so that we
    * don't block the DAGScheduler event loop or other concurrent jobs.
    */
-  def runLocally(job: ActiveJob) {
+  private def runLocally(job: ActiveJob) {
     logInfo("Computing the requested partition locally")
     new Thread("Local computation of job " + job.runId) {
       override def run() {
@@ -349,13 +349,14 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     }.start()
   }
 
-  def submitStage(stage: Stage) {
+  /** Submits stage, but first recursively submits any missing parents. */
+  private def submitStage(stage: Stage) {
     logDebug("submitStage(" + stage + ")")
     if (!waiting(stage) && !running(stage) && !failed(stage)) {
       val missing = getMissingParentStages(stage).sortBy(_.id)
       logDebug("missing: " + missing)
       if (missing == Nil) {
-        logInfo("Submitting " + stage + " (" + stage.origin + "), which has no missing parents")
+        logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
         submitMissingTasks(stage)
         running += stage
       } else {
@@ -367,7 +368,8 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     }
   }
 
-  def submitMissingTasks(stage: Stage) {
+  /** Called when stage's parents are available and we can now do its task. */
+  private def submitMissingTasks(stage: Stage) {
     logDebug("submitMissingTasks(" + stage + ")")
     // Get our pending tasks and remember them in our pendingTasks entry
     val myPending = pendingTasks.getOrElseUpdate(stage, new HashSet)
@@ -388,7 +390,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       }
     }
     if (tasks.size > 0) {
-      logInfo("Submitting " + tasks.size + " missing tasks from " + stage)
+      logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")")
       myPending ++= tasks
       logDebug("New pending tasks: " + myPending)
       taskSched.submitTasks(
@@ -407,7 +409,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
    * Responds to a task finishing. This is called inside the event loop so it assumes that it can
    * modify the scheduler's internal state. Use taskEnded() to post a task end event from outside.
    */
-  def handleTaskCompletion(event: CompletionEvent) {
+  private def handleTaskCompletion(event: CompletionEvent) {
     val task = event.task
     val stage = idToStage(task.stageId)
 
@@ -492,7 +494,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
                 waiting --= newlyRunnable
                 running ++= newlyRunnable
                 for (stage <- newlyRunnable.sortBy(_.id)) {
-                  logInfo("Submitting " + stage + " (" + stage.origin + "), which is now runnable")
+                  logInfo("Submitting " + stage + " (" + stage.rdd + "), which is now runnable")
                   submitMissingTasks(stage)
                 }
               }
@@ -541,7 +543,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
    * Optionally the generation during which the failure was caught can be passed to avoid allowing
    * stray fetch failures from possibly retriggering the detection of a node as lost.
    */
-  def handleExecutorLost(execId: String, maybeGeneration: Option[Long] = None) {
+  private def handleExecutorLost(execId: String, maybeGeneration: Option[Long] = None) {
     val currentGeneration = maybeGeneration.getOrElse(mapOutputTracker.getGeneration)
     if (!failedGeneration.contains(execId) || failedGeneration(execId) < currentGeneration) {
       failedGeneration(execId) = currentGeneration
@@ -567,7 +569,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
    * Aborts all jobs depending on a particular Stage. This is called in response to a task set
    * being cancelled by the TaskScheduler. Use taskSetFailed() to inject this event from outside.
    */
-  def abortStage(failedStage: Stage, reason: String) {
+  private def abortStage(failedStage: Stage, reason: String) {
     val dependentStages = resultStageToJob.keys.filter(x => stageDependsOn(x, failedStage)).toSeq
     for (resultStage <- dependentStages) {
       val job = resultStageToJob(resultStage)
@@ -583,7 +585,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
   /**
    * Return true if one of stage's ancestors is target.
    */
-  def stageDependsOn(stage: Stage, target: Stage): Boolean = {
+  private def stageDependsOn(stage: Stage, target: Stage): Boolean = {
     if (stage == target) {
       return true
     }
@@ -610,7 +612,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     visitedRdds.contains(target.rdd)
   }
 
-  def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = {
+  private def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = {
     // If the partition is cached, return the cache locations
     val cached = getCacheLocs(rdd)(partition)
     if (cached != Nil) {
@@ -636,7 +638,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     return Nil
   }
 
-  def cleanup(cleanupTime: Long) {
+  private def cleanup(cleanupTime: Long) {
     var sizeBefore = idToStage.size
     idToStage.clearOldValues(cleanupTime)
     logInfo("idToStage " + sizeBefore + " --> " + idToStage.size)
diff --git a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
index 83641a2a84..b701b67c89 100644
--- a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
@@ -127,7 +127,6 @@ private[spark] class ShuffleMapTask(
         val bucketId = dep.partitioner.getPartition(pair._1)
         buckets(bucketId) += pair
       }
-      val bucketIterators = buckets.map(_.iterator)
 
       val compressedSizes = new Array[Byte](numOutputSplits)
 
@@ -135,7 +134,7 @@ private[spark] class ShuffleMapTask(
       for (i <- 0 until numOutputSplits) {
         val blockId = "shuffle_" + dep.shuffleId + "_" + partition + "_" + i
         // Get a Scala iterator from Java map
-        val iter: Iterator[(Any, Any)] = bucketIterators(i)
+        val iter: Iterator[(Any, Any)] = buckets(i).iterator
         val size = blockManager.put(blockId, iter, StorageLevel.DISK_ONLY, false)
         compressedSizes(i) = MapOutputTracker.compressSize(size)
       }
diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
index 0b4177805b..1e4fbdb874 100644
--- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
@@ -86,7 +86,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
     }
   }
 
-  def submitTasks(taskSet: TaskSet) {
+  override def submitTasks(taskSet: TaskSet) {
     val tasks = taskSet.tasks
     logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
     this.synchronized {
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
index 26201ad0dd..3dabdd76b1 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
@@ -17,10 +17,7 @@ import java.nio.ByteBuffer
 /**
  * Schedules the tasks within a single TaskSet in the ClusterScheduler.
  */
-private[spark] class TaskSetManager(
-  sched: ClusterScheduler,
-  val taskSet: TaskSet)
-  extends Logging {
+private[spark] class TaskSetManager(sched: ClusterScheduler, val taskSet: TaskSet) extends Logging {
 
   // Maximum time to wait to run a task in a preferred location (in ms)
   val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
@@ -100,7 +97,7 @@ private[spark] class TaskSetManager(
   }
 
   // Add a task to all the pending-task lists that it should be on.
-  def addPendingTask(index: Int) {
+  private def addPendingTask(index: Int) {
     val locations = tasks(index).preferredLocations.toSet & sched.hostsAlive
     if (locations.size == 0) {
       pendingTasksWithNoPrefs += index
@@ -115,7 +112,7 @@ private[spark] class TaskSetManager(
 
   // Return the pending tasks list for a given host, or an empty list if
   // there is no map entry for that host
-  def getPendingTasksForHost(host: String): ArrayBuffer[Int] = {
+  private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = {
     pendingTasksForHost.getOrElse(host, ArrayBuffer())
   }
 
@@ -123,7 +120,7 @@ private[spark] class TaskSetManager(
   // Return None if the list is empty.
   // This method also cleans up any tasks in the list that have already
   // been launched, since we want that to happen lazily.
-  def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
+  private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
     while (!list.isEmpty) {
       val index = list.last
       list.trimEnd(1)
@@ -137,7 +134,7 @@ private[spark] class TaskSetManager(
   // Return a speculative task for a given host if any are available. The task should not have an
   // attempt running on this host, in case the host is slow. In addition, if localOnly is set, the
   // task must have a preference for this host (or no preferred locations at all).
-  def findSpeculativeTask(host: String, localOnly: Boolean): Option[Int] = {
+  private def findSpeculativeTask(host: String, localOnly: Boolean): Option[Int] = {
     val hostsAlive = sched.hostsAlive
     speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set
     val localTask = speculatableTasks.find {
@@ -162,7 +159,7 @@ private[spark] class TaskSetManager(
 
   // Dequeue a pending task for a given node and return its index.
   // If localOnly is set to false, allow non-local tasks as well.
-  def findTask(host: String, localOnly: Boolean): Option[Int] = {
+  private def findTask(host: String, localOnly: Boolean): Option[Int] = {
     val localTask = findTaskFromList(getPendingTasksForHost(host))
     if (localTask != None) {
       return localTask
@@ -184,7 +181,7 @@ private[spark] class TaskSetManager(
   // Does a host count as a preferred location for a task? This is true if
   // either the task has preferred locations and this host is one, or it has
   // no preferred locations (in which we still count the launch as preferred).
-  def isPreferredLocation(task: Task[_], host: String): Boolean = {
+  private def isPreferredLocation(task: Task[_], host: String): Boolean = {
     val locs = task.preferredLocations
     return (locs.contains(host) || locs.isEmpty)
   }
@@ -335,7 +332,7 @@ private[spark] class TaskSetManager(
         if (numFailures(index) > MAX_TASK_FAILURES) {
           logError("Task %s:%d failed more than %d times; aborting job".format(
             taskSet.id, index, MAX_TASK_FAILURES))
-          abort("Task %d failed more than %d times".format(index, MAX_TASK_FAILURES))
+          abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES))
         }
       }
     } else {
diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
index 9ff7c02097..482d1cc853 100644
--- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
@@ -53,7 +53,7 @@ private[spark] class LocalScheduler(threads: Int, maxFailures: Int, sc: SparkCon
     }
 
     def runTask(task: Task[_], idInJob: Int, attemptId: Int) {
-      logInfo("Running task " + idInJob)
+      logInfo("Running " + task)
       // Set the Spark execution environment for the worker thread
       SparkEnv.set(env)
       try {
@@ -80,7 +80,7 @@ private[spark] class LocalScheduler(threads: Int, maxFailures: Int, sc: SparkCon
         val resultToReturn = ser.deserialize[Any](ser.serialize(result))
         val accumUpdates = ser.deserialize[collection.mutable.Map[Long, Any]](
           ser.serialize(Accumulators.values))
-        logInfo("Finished task " + idInJob)
+        logInfo("Finished " + task)
 
         // If the threadpool has not already been shutdown, notify DAGScheduler
         if (!Thread.currentThread().isInterrupted)
diff --git a/core/src/main/scala/spark/util/MetadataCleaner.scala b/core/src/main/scala/spark/util/MetadataCleaner.scala
index eaff7ae581..a342d378ff 100644
--- a/core/src/main/scala/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/spark/util/MetadataCleaner.scala
@@ -9,12 +9,12 @@ import spark.Logging
  * Runs a timer task to periodically clean up metadata (e.g. old files or hashtable entries)
  */
 class MetadataCleaner(name: String, cleanupFunc: (Long) => Unit) extends Logging {
-  val delaySeconds = MetadataCleaner.getDelaySeconds
-  val periodSeconds = math.max(10, delaySeconds / 10)
-  val timer = new Timer(name + " cleanup timer", true)
+  private val delaySeconds = MetadataCleaner.getDelaySeconds
+  private val periodSeconds = math.max(10, delaySeconds / 10)
+  private val timer = new Timer(name + " cleanup timer", true)
 
-  val task = new TimerTask {
-    def run() {
+  private val task = new TimerTask {
+    override def run() {
       try {
         cleanupFunc(System.currentTimeMillis() - (delaySeconds * 1000))
         logInfo("Ran metadata cleaner for " + name)

From 782187c21047ee31728bdb173a2b7ee708cef77b Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Thu, 31 Jan 2013 18:27:25 -0600
Subject: [PATCH 245/291] Once we find a split with no block, we don't have to
 look for more.

---
 .../scala/spark/scheduler/DAGScheduler.scala  | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index b130be6a38..b62b25f688 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -177,18 +177,17 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       if (!visited(rdd)) {
         visited += rdd
         val locs = getCacheLocs(rdd)
-        for (p <- 0 until rdd.splits.size) {
-          if (locs(p) == Nil) {
-            for (dep <- rdd.dependencies) {
-              dep match {
-                case shufDep: ShuffleDependency[_,_] =>
-                  val mapStage = getShuffleMapStage(shufDep, stage.priority)
-                  if (!mapStage.isAvailable) {
-                    missing += mapStage
-                  }
-                case narrowDep: NarrowDependency[_] =>
-                  visit(narrowDep.rdd)
-              }
+        val atLeastOneMissing = (0 until rdd.splits.size).exists(locs(_) == Nil)
+        if (atLeastOneMissing) {
+          for (dep <- rdd.dependencies) {
+            dep match {
+              case shufDep: ShuffleDependency[_,_] =>
+                val mapStage = getShuffleMapStage(shufDep, stage.priority)
+                if (!mapStage.isAvailable) {
+                  missing += mapStage
+                }
+              case narrowDep: NarrowDependency[_] =>
+                visit(narrowDep.rdd)
             }
           }
         }

From 5b0fc265c2f2ce461d61904c2a4e6e47b24d2bbe Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Thu, 31 Jan 2013 17:48:39 -0800
Subject: [PATCH 246/291] Changed PartitionPruningRDD's split to make sure it
 returns the correct split index.

---
 core/src/main/scala/spark/Dependency.scala              | 8 ++++++++
 core/src/main/scala/spark/rdd/PartitionPruningRDD.scala | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/spark/Dependency.scala
index 647aee6eb5..827eac850a 100644
--- a/core/src/main/scala/spark/Dependency.scala
+++ b/core/src/main/scala/spark/Dependency.scala
@@ -72,6 +72,14 @@ class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boo
 
   @transient
   val partitions: Array[Split] = rdd.splits.filter(s => partitionFilterFunc(s.index))
+    .zipWithIndex
+    .map { case(split, idx) => new PruneDependency.PartitionPruningRDDSplit(idx, split) : Split }
 
   override def getParents(partitionId: Int) = List(partitions(partitionId).index)
 }
+
+object PruneDependency {
+  class PartitionPruningRDDSplit(idx: Int, val parentSplit: Split) extends Split {
+    override val index = idx
+  }
+}
diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
index b8482338c6..0989b149e1 100644
--- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
@@ -2,6 +2,7 @@ package spark.rdd
 
 import spark.{PruneDependency, RDD, SparkEnv, Split, TaskContext}
 
+
 /**
  * A RDD used to prune RDD partitions/splits so we can avoid launching tasks on
  * all partitions. An example use case: If we know the RDD is partitioned by range,
@@ -15,7 +16,8 @@ class PartitionPruningRDD[T: ClassManifest](
     @transient partitionFilterFunc: Int => Boolean)
   extends RDD[T](prev.context, List(new PruneDependency(prev, partitionFilterFunc))) {
 
-  override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(split, context)
+  override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(
+    split.asInstanceOf[PruneDependency.PartitionPruningRDDSplit].parentSplit, context)
 
   override protected def getSplits =
     getDependencies.head.asInstanceOf[PruneDependency[T]].partitions

From 6289d9654e32fc92418d41cc6e32fee30f85c833 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Thu, 31 Jan 2013 17:49:36 -0800
Subject: [PATCH 247/291] Removed the TODO comment from PartitionPruningRDD.

---
 core/src/main/scala/spark/rdd/PartitionPruningRDD.scala | 2 --
 1 file changed, 2 deletions(-)

diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
index 0989b149e1..3756870fac 100644
--- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
@@ -8,8 +8,6 @@ import spark.{PruneDependency, RDD, SparkEnv, Split, TaskContext}
  * all partitions. An example use case: If we know the RDD is partitioned by range,
  * and the execution DAG has a filter on the key, we can avoid launching tasks
  * on partitions that don't have the range covering the key.
- *
- * TODO: This currently doesn't give partition IDs properly!
  */
 class PartitionPruningRDD[T: ClassManifest](
     @transient prev: RDD[T],

From 3446d5c8d6b385106ac85e46320d92faa8efb4e6 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 31 Jan 2013 18:02:28 -0800
Subject: [PATCH 248/291] SPARK-673: Capture and re-throw Python exceptions

This patch alters the Python <-> executor protocol to pass on
exception data when they occur in user Python code.
---
 .../scala/spark/api/python/PythonRDD.scala    | 40 ++++++++++++-------
 python/pyspark/worker.py                      | 10 ++++-
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index f43a152ca7..6b9ef62529 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -103,21 +103,30 @@ private[spark] class PythonRDD[T: ClassManifest](
 
       private def read(): Array[Byte] = {
         try {
-          val length = stream.readInt()
-          if (length != -1) {
-            val obj = new Array[Byte](length)
-            stream.readFully(obj)
-            obj
-          } else {
-            // We've finished the data section of the output, but we can still read some
-            // accumulator updates; let's do that, breaking when we get EOFException
-            while (true) {
-              val len2 = stream.readInt()
-              val update = new Array[Byte](len2)
-              stream.readFully(update)
-              accumulator += Collections.singletonList(update)
+          stream.readInt() match {
+            case length if length > 0 => {
+              val obj = new Array[Byte](length)
+              stream.readFully(obj)
+              obj
             }
-            new Array[Byte](0)
+            case -2 => {
+              // Signals that an exception has been thrown in python
+              val exLength = stream.readInt()
+              val obj = new Array[Byte](exLength)
+              stream.readFully(obj)
+              throw new PythonException(new String(obj))
+            }
+            case -1 => {
+              // We've finished the data section of the output, but we can still read some
+              // accumulator updates; let's do that, breaking when we get EOFException
+              while (true) {
+                val len2 = stream.readInt()
+                val update = new Array[Byte](len2)
+                stream.readFully(update)
+                accumulator += Collections.singletonList(update)
+              }
+              new Array[Byte](0)
+          }
           }
         } catch {
           case eof: EOFException => {
@@ -140,6 +149,9 @@ private[spark] class PythonRDD[T: ClassManifest](
   val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
 }
 
+/** Thrown for exceptions in user Python code. */
+private class PythonException(msg: String) extends Exception(msg)
+
 /**
  * Form an RDD[(Array[Byte], Array[Byte])] from key-value pairs returned from Python.
  * This is used by PySpark's shuffle operations.
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index d33d6dd15f..9622e0cfe4 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -2,6 +2,7 @@
 Worker that receives input from Piped RDD.
 """
 import sys
+import traceback
 from base64 import standard_b64decode
 # CloudPickler needs to be imported so that depicklers are registered using the
 # copy_reg module.
@@ -40,8 +41,13 @@ def main():
     else:
         dumps = dump_pickle
     iterator = read_from_pickle_file(sys.stdin)
-    for obj in func(split_index, iterator):
-        write_with_length(dumps(obj), old_stdout)
+    try:
+        for obj in func(split_index, iterator):
+           write_with_length(dumps(obj), old_stdout)
+    except Exception as e:
+        write_int(-2, old_stdout)
+        write_with_length(traceback.format_exc(), old_stdout)
+        sys.exit(-1)
     # Mark the beginning of the accumulators section of the output
     write_int(-1, old_stdout)
     for aid, accum in _accumulatorRegistry.items():

From c33f0ef41a1865de2bae01b52b860650d3734da4 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 31 Jan 2013 21:50:02 -0800
Subject: [PATCH 249/291] Some style cleanup

---
 core/src/main/scala/spark/api/python/PythonRDD.scala | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 6b9ef62529..23e3149248 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -104,19 +104,17 @@ private[spark] class PythonRDD[T: ClassManifest](
       private def read(): Array[Byte] = {
         try {
           stream.readInt() match {
-            case length if length > 0 => {
+            case length if length > 0 =>
               val obj = new Array[Byte](length)
               stream.readFully(obj)
               obj
-            }
-            case -2 => {
+            case -2 =>
               // Signals that an exception has been thrown in python
               val exLength = stream.readInt()
               val obj = new Array[Byte](exLength)
               stream.readFully(obj)
               throw new PythonException(new String(obj))
-            }
-            case -1 => {
+            case -1 =>
               // We've finished the data section of the output, but we can still read some
               // accumulator updates; let's do that, breaking when we get EOFException
               while (true) {
@@ -124,9 +122,8 @@ private[spark] class PythonRDD[T: ClassManifest](
                 val update = new Array[Byte](len2)
                 stream.readFully(update)
                 accumulator += Collections.singletonList(update)
+                new Array[Byte](0)
               }
-              new Array[Byte](0)
-          }
           }
         } catch {
           case eof: EOFException => {

From 39ab83e9577a5449fb0d6ef944dffc0d7cd00b4a Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 31 Jan 2013 21:52:52 -0800
Subject: [PATCH 250/291] Small fix from last commit

---
 core/src/main/scala/spark/api/python/PythonRDD.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 23e3149248..39758e94f4 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -122,8 +122,8 @@ private[spark] class PythonRDD[T: ClassManifest](
                 val update = new Array[Byte](len2)
                 stream.readFully(update)
                 accumulator += Collections.singletonList(update)
-                new Array[Byte](0)
               }
+              new Array[Byte](0)
           }
         } catch {
           case eof: EOFException => {

From f9af9cee6fed9c6af896fb92556ad4f48c7f8e64 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Fri, 1 Feb 2013 00:02:46 -0800
Subject: [PATCH 251/291] Moved PruneDependency into PartitionPruningRDD.scala.

---
 core/src/main/scala/spark/Dependency.scala    | 22 ----------------
 .../scala/spark/rdd/PartitionPruningRDD.scala | 26 ++++++++++++++++---
 2 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/spark/Dependency.scala
index 827eac850a..5eea907322 100644
--- a/core/src/main/scala/spark/Dependency.scala
+++ b/core/src/main/scala/spark/Dependency.scala
@@ -61,25 +61,3 @@ class RangeDependency[T](rdd: RDD[T], inStart: Int, outStart: Int, length: Int)
     }
   }
 }
-
-
-/**
- * Represents a dependency between the PartitionPruningRDD and its parent. In this
- * case, the child RDD contains a subset of partitions of the parents'.
- */
-class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boolean)
-  extends NarrowDependency[T](rdd) {
-
-  @transient
-  val partitions: Array[Split] = rdd.splits.filter(s => partitionFilterFunc(s.index))
-    .zipWithIndex
-    .map { case(split, idx) => new PruneDependency.PartitionPruningRDDSplit(idx, split) : Split }
-
-  override def getParents(partitionId: Int) = List(partitions(partitionId).index)
-}
-
-object PruneDependency {
-  class PartitionPruningRDDSplit(idx: Int, val parentSplit: Split) extends Split {
-    override val index = idx
-  }
-}
diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
index 3756870fac..a50ce75171 100644
--- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
@@ -1,6 +1,26 @@
 package spark.rdd
 
-import spark.{PruneDependency, RDD, SparkEnv, Split, TaskContext}
+import spark.{NarrowDependency, RDD, SparkEnv, Split, TaskContext}
+
+
+class PartitionPruningRDDSplit(idx: Int, val parentSplit: Split) extends Split {
+  override val index = idx
+}
+
+
+/**
+ * Represents a dependency between the PartitionPruningRDD and its parent. In this
+ * case, the child RDD contains a subset of partitions of the parents'.
+ */
+class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boolean)
+  extends NarrowDependency[T](rdd) {
+
+  @transient
+  val partitions: Array[Split] = rdd.splits.filter(s => partitionFilterFunc(s.index))
+    .zipWithIndex.map { case(split, idx) => new PartitionPruningRDDSplit(idx, split) : Split }
+
+  override def getParents(partitionId: Int) = List(partitions(partitionId).index)
+}
 
 
 /**
@@ -15,10 +35,8 @@ class PartitionPruningRDD[T: ClassManifest](
   extends RDD[T](prev.context, List(new PruneDependency(prev, partitionFilterFunc))) {
 
   override def compute(split: Split, context: TaskContext) = firstParent[T].iterator(
-    split.asInstanceOf[PruneDependency.PartitionPruningRDDSplit].parentSplit, context)
+    split.asInstanceOf[PartitionPruningRDDSplit].parentSplit, context)
 
   override protected def getSplits =
     getDependencies.head.asInstanceOf[PruneDependency[T]].partitions
-
-  override val partitioner = firstParent[T].partitioner
 }

From f127f2ae76692b189d86b5a47293579d5657c6d5 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 1 Feb 2013 00:20:49 -0800
Subject: [PATCH 252/291] fixup merge (master -> driver renaming)

---
 core/src/main/scala/spark/storage/BlockManagerMaster.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
index 99324445ca..0372cb080a 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
@@ -118,7 +118,7 @@ private[spark] class BlockManagerMaster(
   }
 
   def getStorageStatus: Array[StorageStatus] = {
-    askMasterWithRetry[ArrayBuffer[StorageStatus]](GetStorageStatus).toArray
+    askDriverWithReply[ArrayBuffer[StorageStatus]](GetStorageStatus).toArray
   }
 
   /** Stop the driver actor, called only on the Spark driver node */

From 8a0a5ed53353ad6aa5656eb729d55ca7af2ab096 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 1 Feb 2013 00:23:38 -0800
Subject: [PATCH 253/291] track total partitions, in addition to cached
 partitions; use scala string formatting

---
 core/src/main/scala/spark/storage/StorageUtils.scala   | 10 ++++------
 core/src/main/twirl/spark/storage/rdd.scala.html       |  6 +++++-
 core/src/main/twirl/spark/storage/rdd_table.scala.html |  6 ++++--
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/spark/storage/StorageUtils.scala
index ce7c067eea..5367b74bb6 100644
--- a/core/src/main/scala/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/spark/storage/StorageUtils.scala
@@ -22,12 +22,11 @@ case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long,
 }
 
 case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel,
-  numPartitions: Int, memSize: Long, diskSize: Long) {
+  numCachedPartitions: Int, numPartitions: Int, memSize: Long, diskSize: Long) {
   override def toString = {
     import Utils.memoryBytesToString
-    import java.lang.{Integer => JInt}
-    String.format("RDD \"%s\" (%d) Storage: %s; Partitions: %d; MemorySize: %s; DiskSize: %s", name, id.asInstanceOf[JInt],
-      storageLevel.toString, numPartitions.asInstanceOf[JInt], memoryBytesToString(memSize), memoryBytesToString(diskSize))
+    "RDD \"%s\" (%d) Storage: %s; CachedPartitions: %d; TotalPartitions: %d; MemorySize: %s; DiskSize: %s".format(name, id,
+      storageLevel.toString, numCachedPartitions, numPartitions, memoryBytesToString(memSize), memoryBytesToString(diskSize))
   }
 }
 
@@ -65,9 +64,8 @@ object StorageUtils {
       val rdd = sc.persistentRdds(rddId)
       val rddName = Option(rdd.name).getOrElse(rddKey)
       val rddStorageLevel = rdd.getStorageLevel
-      //TODO get total number of partitions in rdd
 
-      RDDInfo(rddId, rddName, rddStorageLevel, rddBlocks.length, memSize, diskSize)
+      RDDInfo(rddId, rddName, rddStorageLevel, rddBlocks.length, rdd.splits.size, memSize, diskSize)
     }.toArray
   }
 
diff --git a/core/src/main/twirl/spark/storage/rdd.scala.html b/core/src/main/twirl/spark/storage/rdd.scala.html
index ac7f8c981f..d85addeb17 100644
--- a/core/src/main/twirl/spark/storage/rdd.scala.html
+++ b/core/src/main/twirl/spark/storage/rdd.scala.html
@@ -11,7 +11,11 @@
           <strong>Storage Level:</strong> 
           @(rddInfo.storageLevel.description)
         <li>
-          <strong>Partitions:</strong>
+          <strong>Cached Partitions:</strong>
+          @(rddInfo.numCachedPartitions)
+        </li>
+        <li>
+          <strong>Total Partitions:</strong>
           @(rddInfo.numPartitions)
         </li>
         <li>
diff --git a/core/src/main/twirl/spark/storage/rdd_table.scala.html b/core/src/main/twirl/spark/storage/rdd_table.scala.html
index af801cf229..a51e64aed0 100644
--- a/core/src/main/twirl/spark/storage/rdd_table.scala.html
+++ b/core/src/main/twirl/spark/storage/rdd_table.scala.html
@@ -6,7 +6,8 @@
     <tr>
       <th>RDD Name</th>
       <th>Storage Level</th>
-      <th>Partitions</th>
+      <th>Cached Partitions</th>
+      <th>Fraction Partitions Cached</th>
       <th>Size in Memory</th>
       <th>Size on Disk</th>
     </tr>
@@ -21,7 +22,8 @@
         </td>
         <td>@(rdd.storageLevel.description)
         </td>
-        <td>@rdd.numPartitions</td>
+        <td>@rdd.numCachedPartitions</td>
+        <td>@(rdd.numCachedPartitions / rdd.numPartitions.toDouble)</td>
         <td>@{Utils.memoryBytesToString(rdd.memSize)}</td>
         <td>@{Utils.memoryBytesToString(rdd.diskSize)}</td>
       </tr>

From 57b64d0d1902eb51bf79f595626c2b9f80a9d1e2 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 1 Feb 2013 00:25:19 -0800
Subject: [PATCH 254/291] Fix stdout redirection in PySpark.

---
 python/pyspark/tests.py  | 9 +++++++++
 python/pyspark/worker.py | 5 +++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index df7235756d..52297d44e6 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -108,5 +108,14 @@ class TestAddFile(PySparkTestCase):
         self.assertEqual("Hello World!", UserClass().hello())
 
 
+class TestIO(PySparkTestCase):
+
+    def test_stdout_redirection(self):
+        import subprocess
+        def func(x):
+            subprocess.check_call('ls', shell=True)
+        self.sc.parallelize([1]).foreach(func)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 9622e0cfe4..812e7a9da5 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -1,6 +1,7 @@
 """
 Worker that receives input from Piped RDD.
 """
+import os
 import sys
 import traceback
 from base64 import standard_b64decode
@@ -15,8 +16,8 @@ from pyspark.serializers import write_with_length, read_with_length, write_int,
 
 
 # Redirect stdout to stderr so that users must return values from functions.
-old_stdout = sys.stdout
-sys.stdout = sys.stderr
+old_stdout = os.fdopen(os.dup(1), 'w')
+os.dup2(2, 1)
 
 
 def load_obj():

From 59c57e48dfb362923610785b230d5b3b56c620c3 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Fri, 1 Feb 2013 10:34:02 -0600
Subject: [PATCH 255/291] Stop BlockManagers metadataCleaner.

---
 core/src/main/scala/spark/storage/BlockManager.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/spark/storage/BlockManager.scala
index c61fd75c2b..9893e9625d 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/spark/storage/BlockManager.scala
@@ -950,6 +950,7 @@ class BlockManager(
     blockInfo.clear()
     memoryStore.clear()
     diskStore.clear()
+    metadataCleaner.cancel()
     logInfo("BlockManager stopped")
   }
 }

From c6190067ae40cf457b7f2e58619904b6fd2b1cb6 Mon Sep 17 00:00:00 2001
From: Imran Rashid <imran@quantifind.com>
Date: Fri, 1 Feb 2013 09:55:25 -0800
Subject: [PATCH 256/291] remove unneeded (and unused) filter on block info

---
 core/src/main/scala/spark/storage/StorageUtils.scala | 2 --
 1 file changed, 2 deletions(-)

diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/spark/storage/StorageUtils.scala
index 5367b74bb6..5f72b67b2c 100644
--- a/core/src/main/scala/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/spark/storage/StorageUtils.scala
@@ -43,8 +43,6 @@ object StorageUtils {
   /* Given a list of BlockStatus objets, returns information for each RDD */ 
   def rddInfoFromBlockStatusList(infos: Map[String, BlockStatus], 
     sc: SparkContext) : Array[RDDInfo] = {
-    // Find all RDD Blocks (ignore broadcast variables)
-    val rddBlocks = infos.filterKeys(_.startsWith("rdd"))
 
     // Group by rddId, ignore the partition name
     val groupedRddBlocks = infos.groupBy { case(k, v) =>

From 9cc6ff9c4e7eec2d62261fc166ad2ebade148752 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 1 Feb 2013 11:09:56 -0800
Subject: [PATCH 257/291] Do not launch JavaGateways on workers (SPARK-674).

The problem was that the gateway was being initialized whenever the
pyspark.context module was loaded.  The fix uses lazy initialization
that occurs only when SparkContext instances are actually constructed.

I also made the gateway and jvm variables private.

This change results in ~3-4x performance improvement when running the
PySpark unit tests.
---
 python/pyspark/context.py | 27 +++++++++++++++++----------
 python/pyspark/files.py   |  2 +-
 python/pyspark/rdd.py     | 12 ++++++------
 python/pyspark/tests.py   |  2 +-
 4 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 783e3dc148..ba6896dda3 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -24,11 +24,10 @@ class SparkContext(object):
     broadcast variables on that cluster.
     """
 
-    gateway = launch_gateway()
-    jvm = gateway.jvm
-    _readRDDFromPickleFile = jvm.PythonRDD.readRDDFromPickleFile
-    _writeIteratorToPickleFile = jvm.PythonRDD.writeIteratorToPickleFile
-    _takePartition = jvm.PythonRDD.takePartition
+    _gateway = None
+    _jvm = None
+    _writeIteratorToPickleFile = None
+    _takePartition = None
     _next_accum_id = 0
     _active_spark_context = None
     _lock = Lock()
@@ -56,6 +55,13 @@ class SparkContext(object):
                 raise ValueError("Cannot run multiple SparkContexts at once")
             else:
                 SparkContext._active_spark_context = self
+                if not SparkContext._gateway:
+                    SparkContext._gateway = launch_gateway()
+                    SparkContext._jvm = SparkContext._gateway.jvm
+                    SparkContext._writeIteratorToPickleFile = \
+                        SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
+                    SparkContext._takePartition = \
+                        SparkContext._jvm.PythonRDD.takePartition
         self.master = master
         self.jobName = jobName
         self.sparkHome = sparkHome or None # None becomes null in Py4J
@@ -63,8 +69,8 @@ class SparkContext(object):
         self.batchSize = batchSize  # -1 represents a unlimited batch size
 
         # Create the Java SparkContext through Py4J
-        empty_string_array = self.gateway.new_array(self.jvm.String, 0)
-        self._jsc = self.jvm.JavaSparkContext(master, jobName, sparkHome,
+        empty_string_array = self._gateway.new_array(self._jvm.String, 0)
+        self._jsc = self._jvm.JavaSparkContext(master, jobName, sparkHome,
                                               empty_string_array)
 
         # Create a single Accumulator in Java that we'll send all our updates through;
@@ -72,8 +78,8 @@ class SparkContext(object):
         self._accumulatorServer = accumulators._start_update_server()
         (host, port) = self._accumulatorServer.server_address
         self._javaAccumulator = self._jsc.accumulator(
-                self.jvm.java.util.ArrayList(),
-                self.jvm.PythonAccumulatorParam(host, port))
+                self._jvm.java.util.ArrayList(),
+                self._jvm.PythonAccumulatorParam(host, port))
 
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
         # Broadcast's __reduce__ method stores Broadcast instances here.
@@ -127,7 +133,8 @@ class SparkContext(object):
         for x in c:
             write_with_length(dump_pickle(x), tempFile)
         tempFile.close()
-        jrdd = self._readRDDFromPickleFile(self._jsc, tempFile.name, numSlices)
+        readRDDFromPickleFile = self._jvm.PythonRDD.readRDDFromPickleFile
+        jrdd = readRDDFromPickleFile(self._jsc, tempFile.name, numSlices)
         return RDD(jrdd, self)
 
     def textFile(self, name, minSplits=None):
diff --git a/python/pyspark/files.py b/python/pyspark/files.py
index 98f6a399cc..001b7a28b6 100644
--- a/python/pyspark/files.py
+++ b/python/pyspark/files.py
@@ -35,4 +35,4 @@ class SparkFiles(object):
             return cls._root_directory
         else:
             # This will have to change if we support multiple SparkContexts:
-            return cls._sc.jvm.spark.SparkFiles.getRootDirectory()
+            return cls._sc._jvm.spark.SparkFiles.getRootDirectory()
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d53355a8f1..d7cad2f372 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -407,7 +407,7 @@ class RDD(object):
             return (str(x).encode("utf-8") for x in iterator)
         keyed = PipelinedRDD(self, func)
         keyed._bypass_serializer = True
-        keyed._jrdd.map(self.ctx.jvm.BytesToString()).saveAsTextFile(path)
+        keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path)
 
     # Pair functions
 
@@ -550,8 +550,8 @@ class RDD(object):
                 yield dump_pickle(Batch(items))
         keyed = PipelinedRDD(self, add_shuffle_key)
         keyed._bypass_serializer = True
-        pairRDD = self.ctx.jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
-        partitioner = self.ctx.jvm.PythonPartitioner(numSplits,
+        pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
+        partitioner = self.ctx._jvm.PythonPartitioner(numSplits,
                                                      id(partitionFunc))
         jrdd = pairRDD.partitionBy(partitioner).values()
         rdd = RDD(jrdd, self.ctx)
@@ -730,13 +730,13 @@ class PipelinedRDD(RDD):
         pipe_command = ' '.join(b64enc(cloudpickle.dumps(f)) for f in cmds)
         broadcast_vars = ListConverter().convert(
             [x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
-            self.ctx.gateway._gateway_client)
+            self.ctx._gateway._gateway_client)
         self.ctx._pickled_broadcast_vars.clear()
         class_manifest = self._prev_jrdd.classManifest()
         env = copy.copy(self.ctx.environment)
         env['PYTHONPATH'] = os.environ.get("PYTHONPATH", "")
-        env = MapConverter().convert(env, self.ctx.gateway._gateway_client)
-        python_rdd = self.ctx.jvm.PythonRDD(self._prev_jrdd.rdd(),
+        env = MapConverter().convert(env, self.ctx._gateway._gateway_client)
+        python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
             pipe_command, env, self.preservesPartitioning, self.ctx.pythonExec,
             broadcast_vars, self.ctx._javaAccumulator, class_manifest)
         self._jrdd_val = python_rdd.asJavaRDD()
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 52297d44e6..6a1962d267 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -26,7 +26,7 @@ class PySparkTestCase(unittest.TestCase):
         sys.path = self._old_sys_path
         # To avoid Akka rebinding to the same port, since it doesn't unbind
         # immediately on shutdown
-        self.sc.jvm.System.clearProperty("spark.driver.port")
+        self.sc._jvm.System.clearProperty("spark.driver.port")
 
 
 class TestCheckpoint(PySparkTestCase):

From e211f405bcb3cf02c3ae589cf81d9c9dfc70bc03 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Fri, 1 Feb 2013 11:48:11 -0800
Subject: [PATCH 258/291] Use spark.local.dir for PySpark temp files
 (SPARK-580).

---
 python/pyspark/context.py | 12 ++++++++----
 python/pyspark/rdd.py     |  7 +------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index ba6896dda3..6831f9b7f8 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -1,8 +1,6 @@
 import os
-import atexit
 import shutil
 import sys
-import tempfile
 from threading import Lock
 from tempfile import NamedTemporaryFile
 
@@ -94,6 +92,11 @@ class SparkContext(object):
         SparkFiles._sc = self
         sys.path.append(SparkFiles.getRootDirectory())
 
+        # Create a temporary directory inside spark.local.dir:
+        local_dir = self._jvm.spark.Utils.getLocalDir()
+        self._temp_dir = \
+            self._jvm.spark.Utils.createTempDir(local_dir).getAbsolutePath()
+
     @property
     def defaultParallelism(self):
         """
@@ -126,8 +129,7 @@ class SparkContext(object):
         # Calling the Java parallelize() method with an ArrayList is too slow,
         # because it sends O(n) Py4J commands.  As an alternative, serialized
         # objects are written to a file and loaded through textFile().
-        tempFile = NamedTemporaryFile(delete=False)
-        atexit.register(lambda: os.unlink(tempFile.name))
+        tempFile = NamedTemporaryFile(delete=False, dir=self._temp_dir)
         if self.batchSize != 1:
             c = batched(c, self.batchSize)
         for x in c:
@@ -247,7 +249,9 @@ class SparkContext(object):
 
 
 def _test():
+    import atexit
     import doctest
+    import tempfile
     globs = globals().copy()
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
     globs['tempdir'] = tempfile.mkdtemp()
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d7cad2f372..41ea6e6e14 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1,4 +1,3 @@
-import atexit
 from base64 import standard_b64encode as b64enc
 import copy
 from collections import defaultdict
@@ -264,12 +263,8 @@ class RDD(object):
         # Transferring lots of data through Py4J can be slow because
         # socket.readline() is inefficient.  Instead, we'll dump the data to a
         # file and read it back.
-        tempFile = NamedTemporaryFile(delete=False)
+        tempFile = NamedTemporaryFile(delete=False, dir=self.ctx._temp_dir)
         tempFile.close()
-        def clean_up_file():
-            try: os.unlink(tempFile.name)
-            except: pass
-        atexit.register(clean_up_file)
         self.ctx._writeIteratorToPickleFile(iterator, tempFile.name)
         # Read the data into Python and deserialize it:
         with open(tempFile.name, 'rb') as tempFile:

From 9970926ede0d5a719b8f22e97977804d3c811e97 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 1 Feb 2013 14:07:34 -0800
Subject: [PATCH 259/291] formatting

---
 core/src/main/scala/spark/RDD.scala                      | 2 +-
 core/src/main/scala/spark/scheduler/ShuffleMapTask.scala | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 210404d540..010e61dfdc 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -385,7 +385,7 @@ abstract class RDD[T: ClassManifest](
     val reducePartition: Iterator[T] => Option[T] = iter => {
       if (iter.hasNext) {
         Some(iter.reduceLeft(cleanF))
-      }else {
+      } else {
         None
       }
     }
diff --git a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
index 83641a2a84..20f2c9e489 100644
--- a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
@@ -32,7 +32,7 @@ private[spark] object ShuffleMapTask {
         return old
       } else {
         val out = new ByteArrayOutputStream
-        val ser = SparkEnv.get.closureSerializer.newInstance
+        val ser = SparkEnv.get.closureSerializer.newInstance()
         val objOut = ser.serializeStream(new GZIPOutputStream(out))
         objOut.writeObject(rdd)
         objOut.writeObject(dep)
@@ -48,7 +48,7 @@ private[spark] object ShuffleMapTask {
     synchronized {
       val loader = Thread.currentThread.getContextClassLoader
       val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
-      val ser = SparkEnv.get.closureSerializer.newInstance
+      val ser = SparkEnv.get.closureSerializer.newInstance()
       val objIn = ser.deserializeStream(in)
       val rdd = objIn.readObject().asInstanceOf[RDD[_]]
       val dep = objIn.readObject().asInstanceOf[ShuffleDependency[_,_]]

From 8b3041c7233011c4a96fab045a86df91eae7b6f3 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 1 Feb 2013 15:38:42 -0800
Subject: [PATCH 260/291] Reduced the memory usage of reduce and similar
 operations

These operations used to wait for all the results to be available in an
array on the driver program before merging them. They now merge values
incrementally as they arrive.
---
 .../main/scala/spark/PairRDDFunctions.scala   |  4 +-
 core/src/main/scala/spark/RDD.scala           | 41 ++++++++-----
 core/src/main/scala/spark/SparkContext.scala  | 61 +++++++++++++++----
 core/src/main/scala/spark/Utils.scala         |  8 +++
 .../partial/ApproximateActionListener.scala   |  4 +-
 .../scala/spark/scheduler/DAGScheduler.scala  | 15 +++--
 .../scala/spark/scheduler/JobResult.scala     |  2 +-
 .../scala/spark/scheduler/JobWaiter.scala     | 14 +++--
 core/src/test/scala/spark/RDDSuite.scala      | 12 ++--
 9 files changed, 111 insertions(+), 50 deletions(-)

diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala
index 231e23a7de..cc3cca2571 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/spark/PairRDDFunctions.scala
@@ -465,7 +465,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
         val res = self.context.runJob(self, process _, Array(index), false)
         res(0)
       case None =>
-        self.filter(_._1 == key).map(_._2).collect
+        self.filter(_._1 == key).map(_._2).collect()
     }
   }
 
@@ -590,7 +590,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
 
       var count = 0
       while(iter.hasNext) {
-        val record = iter.next
+        val record = iter.next()
         count += 1
         writer.write(record._1.asInstanceOf[AnyRef], record._2.asInstanceOf[AnyRef])
       }
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 010e61dfdc..9d6ea782bd 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -389,16 +389,18 @@ abstract class RDD[T: ClassManifest](
         None
       }
     }
-    val options = sc.runJob(this, reducePartition)
-    val results = new ArrayBuffer[T]
-    for (opt <- options; elem <- opt) {
-      results += elem
-    }
-    if (results.size == 0) {
-      throw new UnsupportedOperationException("empty collection")
-    } else {
-      return results.reduceLeft(cleanF)
+    var jobResult: Option[T] = None
+    val mergeResult = (index: Int, taskResult: Option[T]) => {
+      if (taskResult != None) {
+        jobResult = jobResult match {
+          case Some(value) => Some(f(value, taskResult.get))
+          case None => taskResult
+        }
+      }
     }
+    sc.runJob(this, reducePartition, mergeResult)
+    // Get the final result out of our Option, or throw an exception if the RDD was empty
+    jobResult.getOrElse(throw new UnsupportedOperationException("empty collection"))
   }
 
   /**
@@ -408,9 +410,13 @@ abstract class RDD[T: ClassManifest](
    * modify t2.
    */
   def fold(zeroValue: T)(op: (T, T) => T): T = {
+    // Clone the zero value since we will also be serializing it as part of tasks
+    var jobResult = Utils.clone(zeroValue, sc.env.closureSerializer.newInstance())
     val cleanOp = sc.clean(op)
-    val results = sc.runJob(this, (iter: Iterator[T]) => iter.fold(zeroValue)(cleanOp))
-    return results.fold(zeroValue)(cleanOp)
+    val foldPartition = (iter: Iterator[T]) => iter.fold(zeroValue)(cleanOp)
+    val mergeResult = (index: Int, taskResult: T) => jobResult = op(jobResult, taskResult)
+    sc.runJob(this, foldPartition, mergeResult)
+    jobResult
   }
 
   /**
@@ -422,11 +428,14 @@ abstract class RDD[T: ClassManifest](
    * allocation.
    */
   def aggregate[U: ClassManifest](zeroValue: U)(seqOp: (U, T) => U, combOp: (U, U) => U): U = {
+    // Clone the zero value since we will also be serializing it as part of tasks
+    var jobResult = Utils.clone(zeroValue, sc.env.closureSerializer.newInstance())
     val cleanSeqOp = sc.clean(seqOp)
     val cleanCombOp = sc.clean(combOp)
-    val results = sc.runJob(this,
-        (iter: Iterator[T]) => iter.aggregate(zeroValue)(cleanSeqOp, cleanCombOp))
-    return results.fold(zeroValue)(cleanCombOp)
+    val aggregatePartition = (it: Iterator[T]) => it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp)
+    val mergeResult = (index: Int, taskResult: U) => jobResult = combOp(jobResult, taskResult)
+    sc.runJob(this, aggregatePartition, mergeResult)
+    jobResult
   }
 
   /**
@@ -437,7 +446,7 @@ abstract class RDD[T: ClassManifest](
       var result = 0L
       while (iter.hasNext) {
         result += 1L
-        iter.next
+        iter.next()
       }
       result
     }).sum
@@ -452,7 +461,7 @@ abstract class RDD[T: ClassManifest](
       var result = 0L
       while (iter.hasNext) {
         result += 1L
-        iter.next
+        iter.next()
       }
       result
     }
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index b0d4b58240..ddbf8f95d9 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -543,10 +543,30 @@ class SparkContext(
   }
 
   /**
-   * Run a function on a given set of partitions in an RDD and return the results. This is the main
-   * entry point to the scheduler, by which all actions get launched. The allowLocal flag specifies
-   * whether the scheduler can run the computation on the driver rather than shipping it out to the
-   * cluster, for short actions like first().
+   * Run a function on a given set of partitions in an RDD and pass the results to the given
+   * handler function. This is the main entry point for all actions in Spark. The allowLocal
+   * flag specifies whether the scheduler can run the computation on the driver rather than
+   * shipping it out to the cluster, for short actions like first().
+   */
+  def runJob[T, U: ClassManifest](
+      rdd: RDD[T],
+      func: (TaskContext, Iterator[T]) => U,
+      partitions: Seq[Int],
+      allowLocal: Boolean,
+      resultHandler: (Int, U) => Unit) {
+    val callSite = Utils.getSparkCallSite
+    logInfo("Starting job: " + callSite)
+    val start = System.nanoTime
+    val result = dagScheduler.runJob(rdd, func, partitions, callSite, allowLocal, resultHandler)
+    logInfo("Job finished: " + callSite + ", took " + (System.nanoTime - start) / 1e9 + " s")
+    rdd.doCheckpoint()
+    result
+  }
+
+  /**
+   * Run a function on a given set of partitions in an RDD and return the results as an array. The
+   * allowLocal flag specifies whether the scheduler can run the computation on the driver rather
+   * than shipping it out to the cluster, for short actions like first().
    */
   def runJob[T, U: ClassManifest](
       rdd: RDD[T],
@@ -554,13 +574,9 @@ class SparkContext(
       partitions: Seq[Int],
       allowLocal: Boolean
       ): Array[U] = {
-    val callSite = Utils.getSparkCallSite
-    logInfo("Starting job: " + callSite)
-    val start = System.nanoTime
-    val result = dagScheduler.runJob(rdd, func, partitions, callSite, allowLocal)
-    logInfo("Job finished: " + callSite + ", took " + (System.nanoTime - start) / 1e9 + " s")
-    rdd.doCheckpoint()
-    result
+    val results = new Array[U](partitions.size)
+    runJob[T, U](rdd, func, partitions, allowLocal, (index, res) => results(index) = res)
+    results
   }
 
   /**
@@ -590,6 +606,29 @@ class SparkContext(
     runJob(rdd, func, 0 until rdd.splits.size, false)
   }
 
+  /**
+   * Run a job on all partitions in an RDD and pass the results to a handler function.
+   */
+  def runJob[T, U: ClassManifest](
+    rdd: RDD[T],
+    processPartition: (TaskContext, Iterator[T]) => U,
+    resultHandler: (Int, U) => Unit)
+  {
+    runJob[T, U](rdd, processPartition, 0 until rdd.splits.size, false, resultHandler)
+  }
+
+  /**
+   * Run a job on all partitions in an RDD and pass the results to a handler function.
+   */
+  def runJob[T, U: ClassManifest](
+      rdd: RDD[T],
+      processPartition: Iterator[T] => U,
+      resultHandler: (Int, U) => Unit)
+  {
+    val processFunc = (context: TaskContext, iter: Iterator[T]) => processPartition(iter)
+    runJob[T, U](rdd, processFunc, 0 until rdd.splits.size, false, resultHandler)
+  }
+
   /**
    * Run a job that can return approximate results.
    */
diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
index 1e58d01273..28d643abca 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/spark/Utils.scala
@@ -12,6 +12,7 @@ import scala.io.Source
 import com.google.common.io.Files
 import com.google.common.util.concurrent.ThreadFactoryBuilder
 import scala.Some
+import spark.serializer.SerializerInstance
 
 /**
  * Various utility methods used by Spark.
@@ -446,4 +447,11 @@ private object Utils extends Logging {
     socket.close()
     portBound
   }
+
+  /**
+   * Clone an object using a Spark serializer.
+   */
+  def clone[T](value: T, serializer: SerializerInstance): T = {
+    serializer.deserialize[T](serializer.serialize(value))
+  }
 }
diff --git a/core/src/main/scala/spark/partial/ApproximateActionListener.scala b/core/src/main/scala/spark/partial/ApproximateActionListener.scala
index 42f46e06ed..24b4909380 100644
--- a/core/src/main/scala/spark/partial/ApproximateActionListener.scala
+++ b/core/src/main/scala/spark/partial/ApproximateActionListener.scala
@@ -32,7 +32,7 @@ private[spark] class ApproximateActionListener[T, U, R](
       if (finishedTasks == totalTasks) {
         // If we had already returned a PartialResult, set its final value
         resultObject.foreach(r => r.setFinalValue(evaluator.currentResult()))
-        // Notify any waiting thread that may have called getResult
+        // Notify any waiting thread that may have called awaitResult
         this.notifyAll()
       }
     }
@@ -49,7 +49,7 @@ private[spark] class ApproximateActionListener[T, U, R](
    * Waits for up to timeout milliseconds since the listener was created and then returns a
    * PartialResult with the result so far. This may be complete if the whole job is done.
    */
-  def getResult(): PartialResult[R] = synchronized {
+  def awaitResult(): PartialResult[R] = synchronized {
     val finishTime = startTime + timeout
     while (true) {
       val time = System.currentTimeMillis()
diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 14f61f7e87..908a22b2df 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -203,18 +203,17 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
       func: (TaskContext, Iterator[T]) => U,
       partitions: Seq[Int],
       callSite: String,
-      allowLocal: Boolean)
-    : Array[U] =
+      allowLocal: Boolean,
+      resultHandler: (Int, U) => Unit)
   {
     if (partitions.size == 0) {
-      return new Array[U](0)
+      return
     }
-    val waiter = new JobWaiter(partitions.size)
+    val waiter = new JobWaiter(partitions.size, resultHandler)
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     eventQueue.put(JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter))
-    waiter.getResult() match {
-      case JobSucceeded(results: Seq[_]) =>
-        return results.asInstanceOf[Seq[U]].toArray
+    waiter.awaitResult() match {
+      case JobSucceeded => {}
       case JobFailed(exception: Exception) =>
         logInfo("Failed to run " + callSite)
         throw exception
@@ -233,7 +232,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     val partitions = (0 until rdd.splits.size).toArray
     eventQueue.put(JobSubmitted(rdd, func2, partitions, false, callSite, listener))
-    return listener.getResult()    // Will throw an exception if the job fails
+    return listener.awaitResult()    // Will throw an exception if the job fails
   }
 
   /**
diff --git a/core/src/main/scala/spark/scheduler/JobResult.scala b/core/src/main/scala/spark/scheduler/JobResult.scala
index c4a74e526f..654131ee84 100644
--- a/core/src/main/scala/spark/scheduler/JobResult.scala
+++ b/core/src/main/scala/spark/scheduler/JobResult.scala
@@ -5,5 +5,5 @@ package spark.scheduler
  */
 private[spark] sealed trait JobResult
 
-private[spark] case class JobSucceeded(results: Seq[_]) extends JobResult
+private[spark] case object JobSucceeded extends JobResult
 private[spark] case class JobFailed(exception: Exception) extends JobResult
diff --git a/core/src/main/scala/spark/scheduler/JobWaiter.scala b/core/src/main/scala/spark/scheduler/JobWaiter.scala
index b3d4feebe5..3cc6a86345 100644
--- a/core/src/main/scala/spark/scheduler/JobWaiter.scala
+++ b/core/src/main/scala/spark/scheduler/JobWaiter.scala
@@ -3,10 +3,12 @@ package spark.scheduler
 import scala.collection.mutable.ArrayBuffer
 
 /**
- * An object that waits for a DAGScheduler job to complete.
+ * An object that waits for a DAGScheduler job to complete. As tasks finish, it passes their
+ * results to the given handler function.
  */
-private[spark] class JobWaiter(totalTasks: Int) extends JobListener {
-  private val taskResults = ArrayBuffer.fill[Any](totalTasks)(null)
+private[spark] class JobWaiter[T](totalTasks: Int, resultHandler: (Int, T) => Unit)
+  extends JobListener {
+
   private var finishedTasks = 0
 
   private var jobFinished = false          // Is the job as a whole finished (succeeded or failed)?
@@ -17,11 +19,11 @@ private[spark] class JobWaiter(totalTasks: Int) extends JobListener {
       if (jobFinished) {
         throw new UnsupportedOperationException("taskSucceeded() called on a finished JobWaiter")
       }
-      taskResults(index) = result
+      resultHandler(index, result.asInstanceOf[T])
       finishedTasks += 1
       if (finishedTasks == totalTasks) {
         jobFinished = true
-        jobResult = JobSucceeded(taskResults)
+        jobResult = JobSucceeded
         this.notifyAll()
       }
     }
@@ -38,7 +40,7 @@ private[spark] class JobWaiter(totalTasks: Int) extends JobListener {
     }
   }
 
-  def getResult(): JobResult = synchronized {
+  def awaitResult(): JobResult = synchronized {
     while (!jobFinished) {
       this.wait()
     }
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index ed03e65153..95d2e62730 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -12,9 +12,9 @@ class RDDSuite extends FunSuite with LocalSparkContext {
     val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
     assert(nums.collect().toList === List(1, 2, 3, 4))
     val dups = sc.makeRDD(Array(1, 1, 2, 2, 3, 3, 4, 4), 2)
-    assert(dups.distinct.count === 4)
-    assert(dups.distinct().collect === dups.distinct.collect)
-    assert(dups.distinct(2).collect === dups.distinct.collect)
+    assert(dups.distinct().count === 4)
+    assert(dups.distinct().collect === dups.distinct().collect)
+    assert(dups.distinct(2).collect === dups.distinct().collect)
     assert(nums.reduce(_ + _) === 10)
     assert(nums.fold(0)(_ + _) === 10)
     assert(nums.map(_.toString).collect().toList === List("1", "2", "3", "4"))
@@ -31,6 +31,10 @@ class RDDSuite extends FunSuite with LocalSparkContext {
       case(split, iter) => Iterator((split, iter.reduceLeft(_ + _)))
     }
     assert(partitionSumsWithSplit.collect().toList === List((0, 3), (1, 7)))
+
+    intercept[UnsupportedOperationException] {
+      nums.filter(_ > 5).reduce(_ + _)
+    }
   }
 
   test("SparkContext.union") {
@@ -164,7 +168,7 @@ class RDDSuite extends FunSuite with LocalSparkContext {
     // Note that split number starts from 0, so > 8 means only 10th partition left.
     val prunedRdd = new PartitionPruningRDD(data, splitNum => splitNum > 8)
     assert(prunedRdd.splits.size === 1)
-    val prunedData = prunedRdd.collect
+    val prunedData = prunedRdd.collect()
     assert(prunedData.size === 1)
     assert(prunedData(0) === 10)
   }

From 12c1eb47568060efac57d6df7df7e5704a8d3fab Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Fri, 1 Feb 2013 21:21:44 -0600
Subject: [PATCH 261/291] Reduce the amount of duplicate logging Akka does to
 stdout.

Given we have Akka logging go through SLF4j to log4j, we don't need
all the extra noise of Akka's stdout logger that is supposedly only
used during Akka init time but seems to continue logging lots of
noisy network events that we either don't care about or are in the
log4j logs anyway.

See:

http://doc.akka.io/docs/akka/2.0/general/configuration.html

    # Log level for the very basic logger activated during AkkaApplication startup
    # Options: ERROR, WARNING, INFO, DEBUG
    # stdout-loglevel = "WARNING"
---
 core/src/main/scala/spark/util/AkkaUtils.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/spark/util/AkkaUtils.scala
index e0fdeffbc4..e43fbd6b1c 100644
--- a/core/src/main/scala/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/spark/util/AkkaUtils.scala
@@ -30,6 +30,7 @@ private[spark] object AkkaUtils {
     val akkaConf = ConfigFactory.parseString("""
       akka.daemonic = on
       akka.event-handlers = ["akka.event.slf4j.Slf4jEventHandler"]
+      akka.stdout-loglevel = "ERROR"
       akka.actor.provider = "akka.remote.RemoteActorRefProvider"
       akka.remote.transport = "akka.remote.netty.NettyRemoteTransport"
       akka.remote.log-remote-lifecycle-events = on

From ae26911ec0d768dcdae8b7d706ca4544e36535e6 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 1 Feb 2013 21:07:24 -0800
Subject: [PATCH 262/291] Add back test for distinct without parens

---
 core/src/test/scala/spark/RDDSuite.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index 95d2e62730..89a3687386 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -12,7 +12,8 @@ class RDDSuite extends FunSuite with LocalSparkContext {
     val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
     assert(nums.collect().toList === List(1, 2, 3, 4))
     val dups = sc.makeRDD(Array(1, 1, 2, 2, 3, 3, 4, 4), 2)
-    assert(dups.distinct().count === 4)
+    assert(dups.distinct().count() === 4)
+    assert(dups.distinct.count === 4)  // Can distinct and count be called without parentheses?
     assert(dups.distinct().collect === dups.distinct().collect)
     assert(dups.distinct(2).collect === dups.distinct().collect)
     assert(nums.reduce(_ + _) === 10)

From 1fd5ee323d127499bb3f173d4142c37532ec29b2 Mon Sep 17 00:00:00 2001
From: Charles Reiss <charles@eecs.berkeley.edu>
Date: Fri, 1 Feb 2013 22:33:38 -0800
Subject: [PATCH 263/291] Code review changes: add sc.stop; style of multiline
 comments; parens on procedure calls.

---
 .../spark/scheduler/DAGSchedulerSuite.scala   | 69 +++++++++++++------
 1 file changed, 47 insertions(+), 22 deletions(-)

diff --git a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
index c31e2e7064..adce1f38bb 100644
--- a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
@@ -31,7 +31,7 @@ import spark.TaskEndReason
 import spark.{FetchFailed, Success}
 
 /**
- * Tests for DAGScheduler. These tests directly call the event processing functinos in DAGScheduler
+ * Tests for DAGScheduler. These tests directly call the event processing functions in DAGScheduler
  * rather than spawning an event loop thread as happens in the real code. They use EasyMock
  * to mock out two classes that DAGScheduler interacts with: TaskScheduler (to which TaskSets are
  * submitted) and BlockManagerMaster (from which cache locations are retrieved and to which dead
@@ -56,29 +56,34 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
   var schedulerThread: Thread = null
   var schedulerException: Throwable = null
 
-  /** Set of EasyMock argument matchers that match a TaskSet for a given RDD.
+  /**
+   * Set of EasyMock argument matchers that match a TaskSet for a given RDD.
    * We cache these so we do not create duplicate matchers for the same RDD.
    * This allows us to easily setup a sequence of expectations for task sets for
    * that RDD.
    */
   val taskSetMatchers = new HashMap[MyRDD, IArgumentMatcher]
 
-  /** Set of cache locations to return from our mock BlockManagerMaster.
+  /**
+   * Set of cache locations to return from our mock BlockManagerMaster.
    * Keys are (rdd ID, partition ID). Anything not present will return an empty
    * list of cache locations silently.
    */
   val cacheLocations = new HashMap[(Int, Int), Seq[BlockManagerId]]
 
-  /** JobWaiter for the last JobSubmitted event we pushed. To keep tests (most of which
+  /**
+   * JobWaiter for the last JobSubmitted event we pushed. To keep tests (most of which
    * will only submit one job) from needing to explicitly track it.
    */
   var lastJobWaiter: JobWaiter = null
 
-  /** Tell EasyMockSugar what mock objects we want to be configured by expecting {...}
+  /**
+   * Tell EasyMockSugar what mock objects we want to be configured by expecting {...}
    * and whenExecuting {...} */
   implicit val mocks = MockObjects(taskScheduler, blockManagerMaster)
 
-  /** Utility function to reset mocks and set expectations on them. EasyMock wants mock objects
+  /**
+   * Utility function to reset mocks and set expectations on them. EasyMock wants mock objects
    * to be reset after each time their expectations are set, and we tend to check mock object
    * calls over a single call to DAGScheduler.
    *
@@ -115,17 +120,21 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     whenExecuting {
       scheduler.stop()
     }
+    sc.stop()
     System.clearProperty("spark.master.port")
   }
 
   def makeBlockManagerId(host: String): BlockManagerId =
     BlockManagerId("exec-" + host, host, 12345)
 
-  /** Type of RDD we use for testing. Note that we should never call the real RDD compute methods.
-   * This is a pair RDD type so it can always be used in ShuffleDependencies. */
+  /**
+   * Type of RDD we use for testing. Note that we should never call the real RDD compute methods.
+   * This is a pair RDD type so it can always be used in ShuffleDependencies.
+   */
   type MyRDD = RDD[(Int, Int)]
 
-  /** Create an RDD for passing to DAGScheduler. These RDDs will use the dependencies and
+  /**
+   * Create an RDD for passing to DAGScheduler. These RDDs will use the dependencies and
    * preferredLocations (if any) that are passed to them. They are deliberately not executable
    * so we can test that DAGScheduler does not try to execute RDDs locally.
    */
@@ -150,7 +159,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     }
   }
 
-  /** EasyMock matcher method. For use as an argument matcher for a TaskSet whose first task
+  /**
+   * EasyMock matcher method. For use as an argument matcher for a TaskSet whose first task
    * is from a particular RDD.
    */
   def taskSetForRdd(rdd: MyRDD): TaskSet = {
@@ -172,7 +182,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     return null
   }
 
-  /** Setup an EasyMock expectation to repsond to blockManagerMaster.getLocations() called from
+  /**
+   * Setup an EasyMock expectation to repsond to blockManagerMaster.getLocations() called from
    * cacheLocations.
    */
   def expectGetLocations(): Unit = {
@@ -197,7 +208,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     }).anyTimes()
   }
 
-  /** Process the supplied event as if it were the top of the DAGScheduler event queue, expecting
+  /**
+   * Process the supplied event as if it were the top of the DAGScheduler event queue, expecting
    * the scheduler not to exit.
    *
    * After processing the event, submit waiting stages as is done on most iterations of the
@@ -208,7 +220,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     scheduler.submitWaitingStages()
   }
 
-  /** Expect a TaskSet for the specified RDD to be submitted to the TaskScheduler. Should be
+  /**
+   * Expect a TaskSet for the specified RDD to be submitted to the TaskScheduler. Should be
    * called from a resetExpecting { ... } block.
    *
    * Returns a easymock Capture that will contain the task set after the stage is submitted.
@@ -220,7 +233,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     return taskSetCapture
   }
 
-  /** Expect the supplied code snippet to submit a stage for the specified RDD.
+  /**
+   * Expect the supplied code snippet to submit a stage for the specified RDD.
    * Return the resulting TaskSet. First marks all the tasks are belonging to the
    * current MapOutputTracker generation.
    */
@@ -239,7 +253,9 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     return taskSet
   }
 
-  /** Send the given CompletionEvent messages for the tasks in the TaskSet. */
+  /**
+   * Send the given CompletionEvent messages for the tasks in the TaskSet.
+   */
   def respondToTaskSet(taskSet: TaskSet, results: Seq[(TaskEndReason, Any)]) {
     assert(taskSet.tasks.size >= results.size)
     for ((result, i) <- results.zipWithIndex) {
@@ -249,7 +265,9 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     }
   }
 
-  /** Assert that the supplied TaskSet has exactly the given preferredLocations. */
+  /**
+   * Assert that the supplied TaskSet has exactly the given preferredLocations.
+   */
   def expectTaskSetLocations(taskSet: TaskSet, locations: Seq[Seq[String]]) {
     assert(locations.size === taskSet.tasks.size)
     for ((expectLocs, taskLocs) <-
@@ -258,7 +276,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     }
   }
 
-  /** When we submit dummy Jobs, this is the compute function we supply. Except in a local test
+  /**
+   * When we submit dummy Jobs, this is the compute function we supply. Except in a local test
    * below, we do not expect this function to ever be executed; instead, we will return results
    * directly through CompletionEvents.
    */
@@ -266,8 +285,10 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
      it.next._1.asInstanceOf[Int]
 
 
-  /** Start a job to compute the given RDD. Returns the JobWaiter that will
-   * collect the result of the job via callbacks from DAGScheduler. */
+  /**
+   * Start a job to compute the given RDD. Returns the JobWaiter that will
+   * collect the result of the job via callbacks from DAGScheduler.
+   */
   def submitRdd(rdd: MyRDD, allowLocal: Boolean = false): JobWaiter = {
     val (toSubmit, waiter) = scheduler.prepareJob[(Int, Int), Int](
         rdd,
@@ -281,7 +302,9 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     return waiter
   }
 
-  /** Assert that a job we started has failed. */
+  /**
+   * Assert that a job we started has failed.
+   */
   def expectJobException(waiter: JobWaiter = lastJobWaiter) {
     waiter.getResult match {
       case JobSucceeded(_) => fail()
@@ -289,7 +312,9 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     }
   }
 
-  /** Assert that a job we started has succeeded and has the given result. */
+  /**
+   * Assert that a job we started has succeeded and has the given result.
+   */
   def expectJobResult(expected: Array[Int], waiter: JobWaiter = lastJobWaiter) {
     waiter.getResult match {
       case JobSucceeded(answer) =>
@@ -500,7 +525,7 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
       ))
     }
     val recomputeOne = interceptStage(shuffleOneRdd) {
-      scheduler.resubmitFailedStages
+      scheduler.resubmitFailedStages()
     }
     val recomputeTwo = interceptStage(shuffleTwoRdd) {
       respondToTaskSet(recomputeOne, List(

From 28e0cb9f312b7fb1b0236fd15ba0dd2f423e826d Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Sat, 2 Feb 2013 01:11:37 -0600
Subject: [PATCH 264/291] Fix createActorSystem not actually using the
 systemName parameter.

This meant all system names were "spark", which worked, but didn't
lead to the most intuitive log output.

This fixes createActorSystem to use the passed system name, and
refactors Master/Worker to encapsulate their system/actor names
instead of having the clients guess at them.

Note that the driver system name, "spark", is left as is, and is
still repeated a few times, but that seems like a separate issue.
---
 .../spark/deploy/LocalSparkCluster.scala      | 38 +++++-------
 .../scala/spark/deploy/client/Client.scala    | 13 +----
 .../scala/spark/deploy/master/Master.scala    | 24 +++++++-
 .../scala/spark/deploy/worker/Worker.scala    | 58 +++++++++----------
 .../spark/storage/BlockManagerMaster.scala    |  2 -
 .../src/main/scala/spark/util/AkkaUtils.scala |  6 +-
 6 files changed, 68 insertions(+), 73 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
index 2836574ecb..22319a96ca 100644
--- a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
@@ -18,35 +18,23 @@ import scala.collection.mutable.ArrayBuffer
 private[spark]
 class LocalSparkCluster(numWorkers: Int, coresPerWorker: Int, memoryPerWorker: Int) extends Logging {
   
-  val localIpAddress = Utils.localIpAddress
+  private val localIpAddress = Utils.localIpAddress
+  private val masterActorSystems = ArrayBuffer[ActorSystem]()
+  private val workerActorSystems = ArrayBuffer[ActorSystem]()
   
-  var masterActor : ActorRef = _
-  var masterActorSystem : ActorSystem = _
-  var masterPort : Int = _
-  var masterUrl : String = _
-  
-  val workerActorSystems = ArrayBuffer[ActorSystem]()
-  val workerActors = ArrayBuffer[ActorRef]()
-  
-  def start() : String = {
+  def start(): String = {
     logInfo("Starting a local Spark cluster with " + numWorkers + " workers.")
 
     /* Start the Master */
-    val (actorSystem, masterPort) = AkkaUtils.createActorSystem("sparkMaster", localIpAddress, 0)
-    masterActorSystem = actorSystem
-    masterUrl = "spark://" + localIpAddress + ":" + masterPort
-    masterActor = masterActorSystem.actorOf(
-      Props(new Master(localIpAddress, masterPort, 0)), name = "Master")
+    val (masterSystem, masterPort) = Master.startSystemAndActor(localIpAddress, 0, 0)
+    masterActorSystems += masterSystem
+    val masterUrl = "spark://" + localIpAddress + ":" + masterPort
 
-    /* Start the Slaves */
+    /* Start the Workers */
     for (workerNum <- 1 to numWorkers) {
-      val (actorSystem, boundPort) = 
-        AkkaUtils.createActorSystem("sparkWorker" + workerNum, localIpAddress, 0)
-      workerActorSystems += actorSystem
-      val actor = actorSystem.actorOf(
-        Props(new Worker(localIpAddress, boundPort, 0, coresPerWorker, memoryPerWorker, masterUrl)),
-              name = "Worker")
-      workerActors += actor
+      val (workerSystem, _) = Worker.startSystemAndActor(localIpAddress, 0, 0, coresPerWorker,
+        memoryPerWorker, masterUrl, null, Some(workerNum))
+      workerActorSystems += workerSystem
     }
 
     return masterUrl
@@ -57,7 +45,7 @@ class LocalSparkCluster(numWorkers: Int, coresPerWorker: Int, memoryPerWorker: I
     // Stop the workers before the master so they don't get upset that it disconnected
     workerActorSystems.foreach(_.shutdown())
     workerActorSystems.foreach(_.awaitTermination())
-    masterActorSystem.shutdown()
-    masterActorSystem.awaitTermination()
+    masterActorSystems.foreach(_.shutdown())
+    masterActorSystems.foreach(_.awaitTermination())
   }
 }
diff --git a/core/src/main/scala/spark/deploy/client/Client.scala b/core/src/main/scala/spark/deploy/client/Client.scala
index 90fe9508cd..a63eee1233 100644
--- a/core/src/main/scala/spark/deploy/client/Client.scala
+++ b/core/src/main/scala/spark/deploy/client/Client.scala
@@ -9,6 +9,7 @@ import spark.{SparkException, Logging}
 import akka.remote.RemoteClientLifeCycleEvent
 import akka.remote.RemoteClientShutdown
 import spark.deploy.RegisterJob
+import spark.deploy.master.Master
 import akka.remote.RemoteClientDisconnected
 import akka.actor.Terminated
 import akka.dispatch.Await
@@ -24,26 +25,18 @@ private[spark] class Client(
     listener: ClientListener)
   extends Logging {
 
-  val MASTER_REGEX = "spark://([^:]+):([0-9]+)".r
-
   var actor: ActorRef = null
   var jobId: String = null
 
-  if (MASTER_REGEX.unapplySeq(masterUrl) == None) {
-    throw new SparkException("Invalid master URL: " + masterUrl)
-  }
-
   class ClientActor extends Actor with Logging {
     var master: ActorRef = null
     var masterAddress: Address = null
     var alreadyDisconnected = false  // To avoid calling listener.disconnected() multiple times
 
     override def preStart() {
-      val Seq(masterHost, masterPort) = MASTER_REGEX.unapplySeq(masterUrl).get
-      logInfo("Connecting to master spark://" + masterHost + ":" + masterPort)
-      val akkaUrl = "akka://spark@%s:%s/user/Master".format(masterHost, masterPort)
+      logInfo("Connecting to master " + masterUrl)
       try {
-        master = context.actorFor(akkaUrl)
+        master = context.actorFor(Master.toAkkaUrl(masterUrl))
         masterAddress = master.path.address
         master ! RegisterJob(jobDescription)
         context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
index c618e87cdd..92e7914b1b 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -262,11 +262,29 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
 }
 
 private[spark] object Master {
+  private val systemName = "sparkMaster"
+  private val actorName = "Master"
+  private val sparkUrlRegex = "spark://([^:]+):([0-9]+)".r
+
   def main(argStrings: Array[String]) {
     val args = new MasterArguments(argStrings)
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", args.ip, args.port)
-    val actor = actorSystem.actorOf(
-      Props(new Master(args.ip, boundPort, args.webUiPort)), name = "Master")
+    val (actorSystem, _) = startSystemAndActor(args.ip, args.port, args.webUiPort)
     actorSystem.awaitTermination()
   }
+
+  /** Returns an `akka://...` URL for the Master actor given a sparkUrl `spark://host:ip`. */
+  def toAkkaUrl(sparkUrl: String): String = {
+    sparkUrl match {
+      case sparkUrlRegex(host, port) =>
+        "akka://%s@%s:%s/user/%s".format(systemName, host, port, actorName)
+      case _ =>
+        throw new SparkException("Invalid master URL: " + sparkUrl)
+    }
+  }
+
+  def startSystemAndActor(host: String, port: Int, webUiPort: Int): (ActorSystem, Int) = {
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port)
+    val actor = actorSystem.actorOf(Props(new Master(host, boundPort, webUiPort)), name = actorName)
+    (actorSystem, boundPort)
+  }
 }
diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala
index 8b41620d98..2219dd6262 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/spark/deploy/worker/Worker.scala
@@ -1,7 +1,7 @@
 package spark.deploy.worker
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
-import akka.actor.{ActorRef, Props, Actor}
+import akka.actor.{ActorRef, Props, Actor, ActorSystem}
 import spark.{Logging, Utils}
 import spark.util.AkkaUtils
 import spark.deploy._
@@ -13,6 +13,7 @@ import akka.remote.RemoteClientDisconnected
 import spark.deploy.RegisterWorker
 import spark.deploy.LaunchExecutor
 import spark.deploy.RegisterWorkerFailed
+import spark.deploy.master.Master
 import akka.actor.Terminated
 import java.io.File
 
@@ -27,7 +28,6 @@ private[spark] class Worker(
   extends Actor with Logging {
 
   val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss")  // For worker and executor IDs
-  val MASTER_REGEX = "spark://([^:]+):([0-9]+)".r
 
   var master: ActorRef = null
   var masterWebUiUrl : String = ""
@@ -48,11 +48,7 @@ private[spark] class Worker(
   def memoryFree: Int = memory - memoryUsed
 
   def createWorkDir() {
-    workDir = if (workDirPath != null) {
-      new File(workDirPath)
-    } else {
-      new File(sparkHome, "work")
-    }
+    workDir = Option(workDirPath).map(new File(_)).getOrElse(new File(sparkHome, "work"))
     try {
       if (!workDir.exists() && !workDir.mkdirs()) {
         logError("Failed to create work directory " + workDir)
@@ -68,8 +64,7 @@ private[spark] class Worker(
   override def preStart() {
     logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
       ip, port, cores, Utils.memoryMegabytesToString(memory)))
-    val envVar = System.getenv("SPARK_HOME")
-    sparkHome = new File(if (envVar == null) "." else envVar)
+    sparkHome = new File(Option(System.getenv("SPARK_HOME")).getOrElse("."))
     logInfo("Spark home: " + sparkHome)
     createWorkDir()
     connectToMaster()
@@ -77,24 +72,15 @@ private[spark] class Worker(
   }
 
   def connectToMaster() {
-    masterUrl match {
-      case MASTER_REGEX(masterHost, masterPort) => {
-        logInfo("Connecting to master spark://" + masterHost + ":" + masterPort)
-        val akkaUrl = "akka://spark@%s:%s/user/Master".format(masterHost, masterPort)
-        try {
-          master = context.actorFor(akkaUrl)
-          master ! RegisterWorker(workerId, ip, port, cores, memory, webUiPort, publicAddress)
-          context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-          context.watch(master) // Doesn't work with remote actors, but useful for testing
-        } catch {
-          case e: Exception =>
-            logError("Failed to connect to master", e)
-            System.exit(1)
-        }
-      }
-
-      case _ =>
-        logError("Invalid master URL: " + masterUrl)
+    logInfo("Connecting to master " + masterUrl)
+    try {
+      master = context.actorFor(Master.toAkkaUrl(masterUrl))
+      master ! RegisterWorker(workerId, ip, port, cores, memory, webUiPort, publicAddress)
+      context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
+      context.watch(master) // Doesn't work with remote actors, but useful for testing
+    } catch {
+      case e: Exception =>
+        logError("Failed to connect to master", e)
         System.exit(1)
     }
   }
@@ -183,11 +169,19 @@ private[spark] class Worker(
 private[spark] object Worker {
   def main(argStrings: Array[String]) {
     val args = new WorkerArguments(argStrings)
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", args.ip, args.port)
-    val actor = actorSystem.actorOf(
-      Props(new Worker(args.ip, boundPort, args.webUiPort, args.cores, args.memory,
-        args.master, args.workDir)),
-      name = "Worker")
+    val (actorSystem, _) = startSystemAndActor(args.ip, args.port, args.webUiPort, args.cores,
+      args.memory, args.master, args.workDir)
     actorSystem.awaitTermination()
   }
+
+  def startSystemAndActor(host: String, port: Int, webUiPort: Int, cores: Int, memory: Int,
+    masterUrl: String, workDir: String, workerNumber: Option[Int] = None): (ActorSystem, Int) = {
+    // The LocalSparkCluster runs multiple local sparkWorkerX actor systems
+    val systemName = "sparkWorker" + workerNumber.map(_.toString).getOrElse("")
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port)
+    val actor = actorSystem.actorOf(Props(new Worker(host, boundPort, webUiPort, cores, memory,
+      masterUrl, workDir)), name = "Worker")
+    (actorSystem, boundPort)
+  }
+
 }
diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
index 36398095a2..7be6b9fa87 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/spark/storage/BlockManagerMaster.scala
@@ -27,8 +27,6 @@ private[spark] class BlockManagerMaster(
   val AKKA_RETRY_INTERVAL_MS: Int = System.getProperty("spark.akka.retry.wait", "3000").toInt
 
   val DRIVER_AKKA_ACTOR_NAME = "BlockMasterManager"
-  val SLAVE_AKKA_ACTOR_NAME = "BlockSlaveManager"
-  val DEFAULT_MANAGER_IP: String = Utils.localHostName()
 
   val timeout = 10.seconds
   var driverActor: ActorRef = {
diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/spark/util/AkkaUtils.scala
index e0fdeffbc4..3a3626e8a0 100644
--- a/core/src/main/scala/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/spark/util/AkkaUtils.scala
@@ -18,9 +18,13 @@ import java.util.concurrent.TimeoutException
  * Various utility classes for working with Akka.
  */
 private[spark] object AkkaUtils {
+
   /**
    * Creates an ActorSystem ready for remoting, with various Spark features. Returns both the
    * ActorSystem itself and its port (which is hard to get from Akka).
+   *
+   * Note: the `name` parameter is important, as even if a client sends a message to right
+   * host + port, if the system name is incorrect, Akka will drop the message.
    */
   def createActorSystem(name: String, host: String, port: Int): (ActorSystem, Int) = {
     val akkaThreads = System.getProperty("spark.akka.threads", "4").toInt
@@ -41,7 +45,7 @@ private[spark] object AkkaUtils {
       akka.actor.default-dispatcher.throughput = %d
       """.format(host, port, akkaTimeout, akkaFrameSize, akkaThreads, akkaBatchSize))
 
-    val actorSystem = ActorSystem("spark", akkaConf, getClass.getClassLoader)
+    val actorSystem = ActorSystem(name, akkaConf, getClass.getClassLoader)
 
     // Figure out the port number we bound to, in case port was passed as 0. This is a bit of a
     // hack because Akka doesn't let you figure out the port through the public API yet.

From 696eec32c982ca516c506de33f383a173bcbd131 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Sat, 2 Feb 2013 02:03:26 -0600
Subject: [PATCH 265/291] Move executorMemory up into SchedulerBackend.

---
 .../spark/scheduler/cluster/SchedulerBackend.scala   | 12 ++++++++++++
 .../cluster/SparkDeploySchedulerBackend.scala        |  9 ---------
 .../mesos/CoarseMesosSchedulerBackend.scala          | 10 ----------
 .../scheduler/mesos/MesosSchedulerBackend.scala      | 10 ----------
 4 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala
index ddcd64d7c6..9ac875de3a 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala
@@ -1,5 +1,7 @@
 package spark.scheduler.cluster
 
+import spark.Utils
+
 /**
  * A backend interface for cluster scheduling systems that allows plugging in different ones under
  * ClusterScheduler. We assume a Mesos-like model where the application gets resource offers as
@@ -11,5 +13,15 @@ private[spark] trait SchedulerBackend {
   def reviveOffers(): Unit
   def defaultParallelism(): Int
 
+  // Memory used by each executor (in megabytes)
+  protected val executorMemory = {
+    // TODO: Might need to add some extra memory for the non-heap parts of the JVM
+    Option(System.getProperty("spark.executor.memory"))
+      .orElse(Option(System.getenv("SPARK_MEM")))
+      .map(Utils.memoryStringToMb)
+      .getOrElse(512)
+  }
+
+
   // TODO: Probably want to add a killTask too
 }
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 2f7099c5b9..59ff8bcb90 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -20,15 +20,6 @@ private[spark] class SparkDeploySchedulerBackend(
 
   val maxCores = System.getProperty("spark.cores.max", Int.MaxValue.toString).toInt
 
-  // Memory used by each executor (in megabytes)
-  val executorMemory = {
-    // TODO: Might need to add some extra memory for the non-heap parts of the JVM
-    Option(System.getProperty("spark.executor.memory"))
-      .orElse(Option(System.getenv("SPARK_MEM")))
-      .map(Utils.memoryStringToMb)
-      .getOrElse(512)
-  }
-
   override def start() {
     super.start()
 
diff --git a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
index 7bf56a05d6..b481ec0a72 100644
--- a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
@@ -35,16 +35,6 @@ private[spark] class CoarseMesosSchedulerBackend(
 
   val MAX_SLAVE_FAILURES = 2     // Blacklist a slave after this many failures
 
-  // Memory used by each executor (in megabytes)
-  val executorMemory = {
-    if (System.getenv("SPARK_MEM") != null) {
-      Utils.memoryStringToMb(System.getenv("SPARK_MEM"))
-      // TODO: Might need to add some extra memory for the non-heap parts of the JVM
-    } else {
-      512
-    }
-  }
-
   // Lock used to wait for scheduler to be registered
   var isRegistered = false
   val registeredLock = new Object()
diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
index eab1c60e0b..5c8b531de3 100644
--- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
@@ -29,16 +29,6 @@ private[spark] class MesosSchedulerBackend(
   with MScheduler
   with Logging {
 
-  // Memory used by each executor (in megabytes)
-  val EXECUTOR_MEMORY = {
-    if (System.getenv("SPARK_MEM") != null) {
-      Utils.memoryStringToMb(System.getenv("SPARK_MEM"))
-      // TODO: Might need to add some extra memory for the non-heap parts of the JVM
-    } else {
-      512
-    }
-  }
-
   // Lock used to wait for scheduler to be registered
   var isRegistered = false
   val registeredLock = new Object()

From cae8a6795c7f454b74c8d3c4425a6ced151d6d9b Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Sat, 2 Feb 2013 02:15:39 -0600
Subject: [PATCH 266/291] Fix dangling old variable names.

---
 .../scala/spark/scheduler/mesos/MesosSchedulerBackend.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
index 5c8b531de3..300766d0f5 100644
--- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
@@ -79,7 +79,7 @@ private[spark] class MesosSchedulerBackend(
     val memory = Resource.newBuilder()
       .setName("mem")
       .setType(Value.Type.SCALAR)
-      .setScalar(Value.Scalar.newBuilder().setValue(EXECUTOR_MEMORY).build())
+      .setScalar(Value.Scalar.newBuilder().setValue(executorMemory).build())
       .build()
     val command = CommandInfo.newBuilder()
       .setValue(execScript)
@@ -151,7 +151,7 @@ private[spark] class MesosSchedulerBackend(
       def enoughMemory(o: Offer) = {
         val mem = getResource(o.getResourcesList, "mem")
         val slaveId = o.getSlaveId.getValue
-        mem >= EXECUTOR_MEMORY || slaveIdsWithExecutors.contains(slaveId)
+        mem >= executorMemory || slaveIdsWithExecutors.contains(slaveId)
       }
 
       for ((offer, index) <- offers.zipWithIndex if enoughMemory(offer)) {

From 7aba123f0c0fd024105462b3a0b203cd357c67e9 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Sat, 2 Feb 2013 13:53:28 -0600
Subject: [PATCH 267/291] Further simplify checking for Nil.

---
 core/src/main/scala/spark/scheduler/DAGScheduler.scala | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index b62b25f688..2a646dd0f5 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -176,9 +176,7 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
     def visit(rdd: RDD[_]) {
       if (!visited(rdd)) {
         visited += rdd
-        val locs = getCacheLocs(rdd)
-        val atLeastOneMissing = (0 until rdd.splits.size).exists(locs(_) == Nil)
-        if (atLeastOneMissing) {
+        if (getCacheLocs(rdd).contains(Nil)) {
           for (dep <- rdd.dependencies) {
             dep match {
               case shufDep: ShuffleDependency[_,_] =>

From 34a7bcdb3a19deed18b25225daf47ff22ee20869 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 2 Feb 2013 19:40:30 -0800
Subject: [PATCH 268/291] Formatting

---
 .../main/scala/spark/scheduler/DAGScheduler.scala | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 8cfc08e5ac..2a35915560 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -205,8 +205,9 @@ class DAGScheduler(
     missing.toList
   }
 
-  /** Returns (and does not) submit a JobSubmitted event suitable to run a given job, and
-   * a JobWaiter whose getResult() method will return the result of the job when it is complete.
+  /**
+   * Returns (and does not submit) a JobSubmitted event suitable to run a given job, and a
+   * JobWaiter whose getResult() method will return the result of the job when it is complete.
    *
    * The job is assumed to have at least one partition; zero partition jobs should be handled
    * without a JobSubmitted event.
@@ -308,7 +309,8 @@ class DAGScheduler(
     return false
   }
 
-  /** Resubmit any failed stages. Ordinarily called after a small amount of time has passed since
+  /**
+   * Resubmit any failed stages. Ordinarily called after a small amount of time has passed since
    * the last fetch failure.
    */
   private[scheduler] def resubmitFailedStages() {
@@ -321,7 +323,8 @@ class DAGScheduler(
     }
   }
   
-  /** Check for waiting or failed stages which are now eligible for resubmission.
+  /**
+   * Check for waiting or failed stages which are now eligible for resubmission.
    * Ordinarily run on every iteration of the event loop.
    */
   private[scheduler] def submitWaitingStages() {
@@ -366,9 +369,9 @@ class DAGScheduler(
       // the same time, so we want to make sure we've identified all the reduce tasks that depend
       // on the failed node.
       if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) {
-        resubmitFailedStages
+        resubmitFailedStages()
       } else {
-        submitWaitingStages
+        submitWaitingStages()
       }
     }
   }

From 2415c18f48fc28d88f29b88c312f98054f530f20 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 3 Feb 2013 06:44:11 +0000
Subject: [PATCH 269/291] Fix reporting of PySpark doctest failures.

---
 python/pyspark/context.py | 4 +++-
 python/pyspark/rdd.py     | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 6831f9b7f8..657fe6f989 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -256,8 +256,10 @@ def _test():
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
     globs['tempdir'] = tempfile.mkdtemp()
     atexit.register(lambda: shutil.rmtree(globs['tempdir']))
-    doctest.testmod(globs=globs)
+    (failure_count, test_count) = doctest.testmod(globs=globs)
     globs['sc'].stop()
+    if failure_count:
+        exit(-1)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 41ea6e6e14..fb144bc45d 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -748,8 +748,10 @@ def _test():
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
-    doctest.testmod(globs=globs)
+    (failure_count, test_count) = doctest.testmod(globs=globs)
     globs['sc'].stop()
+    if failure_count:
+        exit(-1)
 
 
 if __name__ == "__main__":

From 8fbd5380b7f36842297f624bad3a2513f7eca47b Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 3 Feb 2013 06:44:49 +0000
Subject: [PATCH 270/291] Fetch fewer objects in PySpark's take() method.

---
 core/src/main/scala/spark/api/python/PythonRDD.scala | 11 +++++++++--
 python/pyspark/rdd.py                                |  4 ++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/spark/api/python/PythonRDD.scala
index 39758e94f4..ab8351e55e 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/spark/api/python/PythonRDD.scala
@@ -238,6 +238,11 @@ private[spark] object PythonRDD {
   }
 
   def writeIteratorToPickleFile[T](items: java.util.Iterator[T], filename: String) {
+    import scala.collection.JavaConverters._
+    writeIteratorToPickleFile(items.asScala, filename)
+  }
+
+  def writeIteratorToPickleFile[T](items: Iterator[T], filename: String) {
     val file = new DataOutputStream(new FileOutputStream(filename))
     for (item <- items) {
       writeAsPickle(item, file)
@@ -245,8 +250,10 @@ private[spark] object PythonRDD {
     file.close()
   }
 
-  def takePartition[T](rdd: RDD[T], partition: Int): java.util.Iterator[T] =
-    rdd.context.runJob(rdd, ((x: Iterator[T]) => x), Seq(partition), true).head
+  def takePartition[T](rdd: RDD[T], partition: Int): Iterator[T] = {
+    implicit val cm : ClassManifest[T] = rdd.elementClassManifest
+    rdd.context.runJob(rdd, ((x: Iterator[T]) => x.toArray), Seq(partition), true).head.iterator
+  }
 }
 
 private object Pickle {
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index fb144bc45d..4cda6cf661 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -372,6 +372,10 @@ class RDD(object):
         items = []
         for partition in range(self._jrdd.splits().size()):
             iterator = self.ctx._takePartition(self._jrdd.rdd(), partition)
+            # Each item in the iterator is a string, Python object, batch of
+            # Python objects.  Regardless, it is sufficient to take `num`
+            # of these objects in order to collect `num` Python objects:
+            iterator = iterator.take(num)
             items.extend(self._collect_iterator_through_file(iterator))
             if len(items) >= num:
                 break

From 9163c3705d98ca19c09fe5618e347b9d20f88f63 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 2 Feb 2013 23:34:47 -0800
Subject: [PATCH 271/291] Formatting

---
 core/src/main/scala/spark/scheduler/DAGScheduler.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
index 2a35915560..edbfd1c45f 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -265,7 +265,8 @@ class DAGScheduler(
     return listener.awaitResult()    // Will throw an exception if the job fails
   }
 
-  /** Process one event retrieved from the event queue.
+  /**
+   * Process one event retrieved from the event queue.
    * Returns true if we should stop the event loop.
    */
   private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = {

From e61729113d3bf165d1ab9bd83ea55d52fd0bb72e Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@eecs.berkeley.edu>
Date: Sun, 3 Feb 2013 21:29:40 -0800
Subject: [PATCH 272/291] Remove unnecessary doctest __main__ methods.

---
 python/pyspark/accumulators.py | 9 ---------
 python/pyspark/broadcast.py    | 9 ---------
 2 files changed, 18 deletions(-)

diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 61fcbbd376..3e9d7d36da 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -196,12 +196,3 @@ def _start_update_server():
     thread.daemon = True
     thread.start()
     return server
-
-
-def _test():
-    import doctest
-    doctest.testmod()
-
-
-if __name__ == "__main__":
-    _test()
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 93876fa738..def810dd46 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -37,12 +37,3 @@ class Broadcast(object):
     def __reduce__(self):
         self._pickle_registry.add(self)
         return (_from_id, (self.bid, ))
-
-
-def _test():
-    import doctest
-    doctest.testmod()
-
-
-if __name__ == "__main__":
-    _test()

From aa4ee1e9e5485c1b96474e704c76225a2b8a7da9 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 4 Feb 2013 11:06:31 -0800
Subject: [PATCH 273/291] Fix failing test

---
 core/src/test/scala/spark/MapOutputTrackerSuite.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
index f4e7ec39fe..dd19442dcb 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala
@@ -79,8 +79,7 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
   test("remote fetch") {
     try {
       System.clearProperty("spark.driver.host")  // In case some previous test had set it
-      val (actorSystem, boundPort) =
-        AkkaUtils.createActorSystem("test", "localhost", 0)
+      val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", "localhost", 0)
       System.setProperty("spark.driver.port", boundPort.toString)
       val masterTracker = new MapOutputTracker(actorSystem, true)
       val slaveTracker = new MapOutputTracker(actorSystem, false)

From f6ec547ea7b56ee607a4c2a69206f8952318eaf1 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 4 Feb 2013 13:14:54 -0800
Subject: [PATCH 274/291] Small fix to test for distinct

---
 core/src/test/scala/spark/RDDSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index 89a3687386..fe7deb10d6 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -14,7 +14,7 @@ class RDDSuite extends FunSuite with LocalSparkContext {
     val dups = sc.makeRDD(Array(1, 1, 2, 2, 3, 3, 4, 4), 2)
     assert(dups.distinct().count() === 4)
     assert(dups.distinct.count === 4)  // Can distinct and count be called without parentheses?
-    assert(dups.distinct().collect === dups.distinct().collect)
+    assert(dups.distinct.collect === dups.distinct().collect)
     assert(dups.distinct(2).collect === dups.distinct().collect)
     assert(nums.reduce(_ + _) === 10)
     assert(nums.fold(0)(_ + _) === 10)

From 7eea64aa4c0d6a51406e0d1b039906ee9559cd58 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 5 Feb 2013 11:41:31 -0800
Subject: [PATCH 275/291] Streaming constructor which takes JavaSparkContext

It's sometimes helpful to directly pass a JavaSparkContext,
and take advantage of the various constructors available for that.
---
 .../spark/streaming/api/java/JavaStreamingContext.scala   | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
index f82e6a37cc..e7f446a49b 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
@@ -33,6 +33,14 @@ class JavaStreamingContext(val ssc: StreamingContext) {
   def this(master: String, frameworkName: String, batchDuration: Duration) =
     this(new StreamingContext(master, frameworkName, batchDuration))
 
+  /**
+   * Creates a StreamingContext.
+   * @param sparkContext The underlying JavaSparkContext to use
+   * @param batchDuration The time interval at which streaming data will be divided into batches
+   */
+  def this(sparkContext: JavaSparkContext, batchDuration: Duration) =
+    this(new StreamingContext(sparkContext.sc, batchDuration))
+
   /**
    * Re-creates a StreamingContext from a checkpoint file.
    * @param path Path either to the directory that was specified as the checkpoint directory, or

From 8bd0e888f377f13ac239df4ffd49fc666095e764 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 5 Feb 2013 17:50:25 -0600
Subject: [PATCH 276/291] Inline mergePair to look more like the narrow dep
 branch.

No functionality changes, I think this is just more consistent
given mergePair isn't called multiple times/recursive.

Also added a comment to explain the usual case of having two parent RDDs.
---
 core/src/main/scala/spark/rdd/CoGroupedRDD.scala | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
index 8fafd27bb6..4893fe8d78 100644
--- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
@@ -84,6 +84,7 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[(_, _)]], part: Partitioner)
   override def compute(s: Split, context: TaskContext): Iterator[(K, Seq[Seq[_]])] = {
     val split = s.asInstanceOf[CoGroupSplit]
     val numRdds = split.deps.size
+    // e.g. for `(k, a) cogroup (k, b)`, K -> Seq(ArrayBuffer as, ArrayBuffer bs)
     val map = new JHashMap[K, Seq[ArrayBuffer[Any]]]
     def getSeq(k: K): Seq[ArrayBuffer[Any]] = {
       val seq = map.get(k)
@@ -104,13 +105,10 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[(_, _)]], part: Partitioner)
       }
       case ShuffleCoGroupSplitDep(shuffleId) => {
         // Read map outputs of shuffle
-        def mergePair(pair: (K, Seq[Any])) {
-          val mySeq = getSeq(pair._1)
-          for (v <- pair._2)
-            mySeq(depNum) += v
-        }
         val fetcher = SparkEnv.get.shuffleFetcher
-        fetcher.fetch[K, Seq[Any]](shuffleId, split.index).foreach(mergePair)
+        for ((k, vs) <- fetcher.fetch[K, Seq[Any]](shuffleId, split.index)) {
+          getSeq(k)(depNum) ++= vs
+        }
       }
     }
     JavaConversions.mapAsScalaMap(map).iterator

From 1ba3393ceb5709620a28b8bc01826153993fc444 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 5 Feb 2013 17:56:50 -0600
Subject: [PATCH 277/291] Increase DriverSuite timeout.

---
 core/src/test/scala/spark/DriverSuite.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/spark/DriverSuite.scala b/core/src/test/scala/spark/DriverSuite.scala
index 342610e1dd..5e84b3a66a 100644
--- a/core/src/test/scala/spark/DriverSuite.scala
+++ b/core/src/test/scala/spark/DriverSuite.scala
@@ -9,10 +9,11 @@ import org.scalatest.time.SpanSugar._
 
 class DriverSuite extends FunSuite with Timeouts {
   test("driver should exit after finishing") {
+    assert(System.getenv("SPARK_HOME") != null)
     // Regression test for SPARK-530: "Spark driver process doesn't exit after finishing"
     val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
     forAll(masters) { (master: String) =>
-      failAfter(10 seconds) {
+      failAfter(30 seconds) {
         Utils.execute(Seq("./run", "spark.DriverWithoutCleanup", master),
           new File(System.getenv("SPARK_HOME")))
       }

From 0e19093fd89ec9740f98cdcffd1ec09f4faf2490 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Tue, 5 Feb 2013 18:58:00 -0600
Subject: [PATCH 278/291] Handle Terminated to avoid endless
 DeathPactExceptions.

Credit to Roland Kuhn, Akka's tech lead, for pointing out this
various obvious fix, but StandaloneExecutorBackend.preStart's
catch block would never (ever) get hit, because all of the
operation's in preStart are async.

So, the System.exit in the catch block was skipped, and instead
Akka was sending Terminated messages which, since we didn't
handle, it turned into DeathPactException, which started
a postRestart/preStart infinite loop.
---
 .../scala/spark/deploy/worker/Worker.scala    |  7 ++----
 .../executor/StandaloneExecutorBackend.scala  | 25 ++++++++-----------
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala
index 8b41620d98..48177a638a 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/spark/deploy/worker/Worker.scala
@@ -1,19 +1,16 @@
 package spark.deploy.worker
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
-import akka.actor.{ActorRef, Props, Actor}
+import akka.actor.{ActorRef, Props, Actor, Terminated}
 import spark.{Logging, Utils}
 import spark.util.AkkaUtils
 import spark.deploy._
-import akka.remote.RemoteClientLifeCycleEvent
+import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
 import java.text.SimpleDateFormat
 import java.util.Date
-import akka.remote.RemoteClientShutdown
-import akka.remote.RemoteClientDisconnected
 import spark.deploy.RegisterWorker
 import spark.deploy.LaunchExecutor
 import spark.deploy.RegisterWorkerFailed
-import akka.actor.Terminated
 import java.io.File
 
 private[spark] class Worker(
diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
index e45288ff53..224c126fdd 100644
--- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
+++ b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
@@ -4,16 +4,15 @@ import java.nio.ByteBuffer
 import spark.Logging
 import spark.TaskState.TaskState
 import spark.util.AkkaUtils
-import akka.actor.{ActorRef, Actor, Props}
+import akka.actor.{ActorRef, Actor, Props, Terminated}
+import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
 import java.util.concurrent.{TimeUnit, ThreadPoolExecutor, SynchronousQueue}
-import akka.remote.RemoteClientLifeCycleEvent
 import spark.scheduler.cluster._
 import spark.scheduler.cluster.RegisteredExecutor
 import spark.scheduler.cluster.LaunchTask
 import spark.scheduler.cluster.RegisterExecutorFailed
 import spark.scheduler.cluster.RegisterExecutor
 
-
 private[spark] class StandaloneExecutorBackend(
     executor: Executor,
     driverUrl: String,
@@ -27,17 +26,11 @@ private[spark] class StandaloneExecutorBackend(
   var driver: ActorRef = null
 
   override def preStart() {
-    try {
-      logInfo("Connecting to driver: " + driverUrl)
-      driver = context.actorFor(driverUrl)
-      driver ! RegisterExecutor(executorId, hostname, cores)
-      context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-      context.watch(driver) // Doesn't work with remote actors, but useful for testing
-    } catch {
-      case e: Exception =>
-        logError("Failed to connect to driver", e)
-        System.exit(1)
-    }
+    logInfo("Connecting to driver: " + driverUrl)
+    driver = context.actorFor(driverUrl)
+    driver ! RegisterExecutor(executorId, hostname, cores)
+    context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
+    context.watch(driver) // Doesn't work with remote actors, but useful for testing
   }
 
   override def receive = {
@@ -52,6 +45,10 @@ private[spark] class StandaloneExecutorBackend(
     case LaunchTask(taskDesc) =>
       logInfo("Got assigned task " + taskDesc.taskId)
       executor.launchTask(this, taskDesc.taskId, taskDesc.serializedTask)
+
+    case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>
+      logError("Driver terminated or disconnected! Shutting down.")
+      System.exit(1)
   }
 
   override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer) {

From d55e3aa467ab7d406739255bd8dc3dfc60f3cb16 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 7 Feb 2013 13:59:18 -0800
Subject: [PATCH 279/291] Updated JavaStreamingContext with updated kafkaStream
 API.

---
 .../api/java/JavaStreamingContext.scala       | 26 +++++++------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
index f82e6a37cc..70d6bd2b1b 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
@@ -45,27 +45,24 @@ class JavaStreamingContext(val ssc: StreamingContext) {
 
   /**
    * Create an input stream that pulls messages form a Kafka Broker.
-   * @param hostname Zookeper hostname.
-   * @param port Zookeper port.
+   * @param zkQuorum Zookeper quorum (hostname:port,hostname:port,..).
    * @param groupId The group id for this consumer.
    * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
    * in its own thread.
    */
   def kafkaStream[T](
-    hostname: String,
-    port: Int,
+    zkQuorum: String,
     groupId: String,
     topics: JMap[String, JInt])
   : JavaDStream[T] = {
     implicit val cmt: ClassManifest[T] =
       implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
-    ssc.kafkaStream[T](hostname, port, groupId, Map(topics.mapValues(_.intValue()).toSeq: _*))
+    ssc.kafkaStream[T](zkQuorum, groupId, Map(topics.mapValues(_.intValue()).toSeq: _*))
   }
 
   /**
    * Create an input stream that pulls messages form a Kafka Broker.
-   * @param hostname Zookeper hostname.
-   * @param port Zookeper port.
+   * @param zkQuorum Zookeper quorum (hostname:port,hostname:port,..).
    * @param groupId The group id for this consumer.
    * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
    * in its own thread.
@@ -73,8 +70,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
    * By default the value is pulled from zookeper.
    */
   def kafkaStream[T](
-    hostname: String,
-    port: Int,
+    zkQuorum: String,
     groupId: String,
     topics: JMap[String, JInt],
     initialOffsets: JMap[KafkaPartitionKey, JLong])
@@ -82,8 +78,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
     implicit val cmt: ClassManifest[T] =
       implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
     ssc.kafkaStream[T](
-      hostname,
-      port,
+      zkQuorum,
       groupId,
       Map(topics.mapValues(_.intValue()).toSeq: _*),
       Map(initialOffsets.mapValues(_.longValue()).toSeq: _*))
@@ -91,8 +86,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
 
   /**
    * Create an input stream that pulls messages form a Kafka Broker.
-   * @param hostname Zookeper hostname.
-   * @param port Zookeper port.
+   * @param zkQuorum Zookeper quorum (hostname:port,hostname:port,..).
    * @param groupId The group id for this consumer.
    * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
    * in its own thread.
@@ -101,8 +95,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
    * @param storageLevel RDD storage level. Defaults to memory-only
    */
   def kafkaStream[T](
-    hostname: String,
-    port: Int,
+    zkQuorum: String,
     groupId: String,
     topics: JMap[String, JInt],
     initialOffsets: JMap[KafkaPartitionKey, JLong],
@@ -111,8 +104,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
     implicit val cmt: ClassManifest[T] =
       implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
     ssc.kafkaStream[T](
-      hostname,
-      port,
+      zkQuorum,
       groupId,
       Map(topics.mapValues(_.intValue()).toSeq: _*),
       Map(initialOffsets.mapValues(_.longValue()).toSeq: _*),

From 99a5fc498acf3de14d754f8dda0df6bb81dd9595 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Sat, 9 Feb 2013 15:18:05 -0800
Subject: [PATCH 280/291] Added an initial spark job to ensure worker nodes are
 initialized.

---
 .../main/scala/spark/streaming/NetworkInputTracker.scala   | 7 ++++++-
 .../src/test/scala/spark/streaming/InputStreamsSuite.scala | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala b/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala
index e4152f3a61..b54f53b203 100644
--- a/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala
+++ b/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala
@@ -4,6 +4,7 @@ import spark.streaming.dstream.{NetworkInputDStream, NetworkReceiver}
 import spark.streaming.dstream.{StopReceiver, ReportBlock, ReportError}
 import spark.Logging
 import spark.SparkEnv
+import spark.SparkContext._
 
 import scala.collection.mutable.HashMap
 import scala.collection.mutable.Queue
@@ -138,8 +139,12 @@ class NetworkInputTracker(
         }
         iterator.next().start()
       }
+      // Run the dummy Spark job to ensure that all slaves have registered.
+      // This avoids all the receivers to be scheduled on the same node.
+      //ssc.sparkContext.makeRDD(1 to 100, 100).map(x => (x, 1)).reduceByKey(_ + _, 20).collect()
+
       // Distribute the receivers and start them
-      ssc.sc.runJob(tempRDD, startReceiver)
+      ssc.sparkContext.runJob(tempRDD, startReceiver)
     }
     
     /** Stops the receivers. */
diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
index c442210004..0eb9c7b81e 100644
--- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
@@ -95,7 +95,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
     val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
     val input = Seq(1, 2, 3, 4, 5)
-
+    Thread.sleep(1000)
     val transceiver = new NettyTransceiver(new InetSocketAddress("localhost", 33333));
     val client = SpecificRequestor.getClient(
       classOf[AvroSourceProtocol], transceiver);

From 16baea62bce62987158acce0595a0916c25b32b2 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Sun, 10 Feb 2013 19:14:49 -0800
Subject: [PATCH 281/291] Fixed bug in CheckpointRDD to prevent exception when
 the original RDD had zero splits.

---
 core/src/main/scala/spark/rdd/CheckpointRDD.scala |  4 ++--
 core/src/test/scala/spark/CheckpointSuite.scala   | 10 ++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/spark/rdd/CheckpointRDD.scala
index 96b593ba7c..a21338f85f 100644
--- a/core/src/main/scala/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/spark/rdd/CheckpointRDD.scala
@@ -24,8 +24,8 @@ class CheckpointRDD[T: ClassManifest](sc: SparkContext, val checkpointPath: Stri
     val dirContents = fs.listStatus(new Path(checkpointPath))
     val splitFiles = dirContents.map(_.getPath.toString).filter(_.contains("part-")).sorted
     val numSplits = splitFiles.size
-    if (!splitFiles(0).endsWith(CheckpointRDD.splitIdToFile(0)) ||
-        !splitFiles(numSplits-1).endsWith(CheckpointRDD.splitIdToFile(numSplits-1))) {
+    if (numSplits > 0 && (!splitFiles(0).endsWith(CheckpointRDD.splitIdToFile(0)) ||
+        !splitFiles(numSplits-1).endsWith(CheckpointRDD.splitIdToFile(numSplits-1)))) {
       throw new SparkException("Invalid checkpoint directory: " + checkpointPath)
     }
     Array.tabulate(numSplits)(i => new CheckpointRDDSplit(i))
diff --git a/core/src/test/scala/spark/CheckpointSuite.scala b/core/src/test/scala/spark/CheckpointSuite.scala
index 0b74607fb8..4425949f46 100644
--- a/core/src/test/scala/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/spark/CheckpointSuite.scala
@@ -162,6 +162,16 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
       rdd => new ZippedRDD(sc, rdd, rdd.map(x => x)), true, false)
   }
 
+  test("CheckpointRDD with zero partitions") {
+    val rdd = new BlockRDD[Int](sc, Array[String]())
+    assert(rdd.splits.size === 0)
+    assert(rdd.isCheckpointed === false)
+    rdd.checkpoint()
+    assert(rdd.count() === 0)
+    assert(rdd.isCheckpointed === true)
+    assert(rdd.splits.size === 0)
+  }
+
   /**
    * Test checkpointing of the final RDD generated by the given operation. By default,
    * this method tests whether the size of serialized RDD has reduced after checkpointing or not.

From fd90daf850a922fe33c3638b18304d827953e2cb Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Sun, 10 Feb 2013 19:48:42 -0800
Subject: [PATCH 282/291] Fixed bugs in FileInputDStream and Scheduler that
 occasionally failed to reprocess old files after recovering from master
 failure. Completely modified spark.streaming.FailureTest to test multiple
 master failures using file input stream.

---
 .../scala/spark/streaming/DStreamGraph.scala  |   2 +
 .../scala/spark/streaming/JobManager.scala    |   4 +-
 .../scala/spark/streaming/Scheduler.scala     |   8 +-
 .../src/main/scala/spark/streaming/Time.scala |   4 +
 .../streaming/dstream/FileInputDStream.scala  |  13 +-
 .../scala/spark/streaming/FailureSuite.scala  | 283 +++++++++++++-----
 6 files changed, 222 insertions(+), 92 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
index d5a5496839..7aa9d20004 100644
--- a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
@@ -81,12 +81,14 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
 
   private[streaming] def generateRDDs(time: Time): Seq[Job] = {
     this.synchronized {
+      logInfo("Generating RDDs for time " + time)
       outputStreams.flatMap(outputStream => outputStream.generateJob(time))
     }
   }
 
   private[streaming] def forgetOldRDDs(time: Time) {
     this.synchronized {
+      logInfo("Forgetting old RDDs for time " + time)
       outputStreams.foreach(_.forgetOldMetadata(time))
     }
   }
diff --git a/streaming/src/main/scala/spark/streaming/JobManager.scala b/streaming/src/main/scala/spark/streaming/JobManager.scala
index 5acdd01e58..8b18c7bc6a 100644
--- a/streaming/src/main/scala/spark/streaming/JobManager.scala
+++ b/streaming/src/main/scala/spark/streaming/JobManager.scala
@@ -15,8 +15,8 @@ class JobManager(ssc: StreamingContext, numThreads: Int = 1) extends Logging {
       SparkEnv.set(ssc.env)
       try {
         val timeTaken = job.run()
-        logInfo("Total delay: %.5f s for job %s (execution: %.5f s)".format(
-          (System.currentTimeMillis() - job.time.milliseconds) / 1000.0, job.id, timeTaken / 1000.0))
+        logInfo("Total delay: %.5f s for job %s of time %s (execution: %.5f s)".format(
+          (System.currentTimeMillis() - job.time.milliseconds) / 1000.0, job.id, job.time.milliseconds, timeTaken / 1000.0))
       } catch {
         case e: Exception =>
           logError("Running " + job + " failed", e)
diff --git a/streaming/src/main/scala/spark/streaming/Scheduler.scala b/streaming/src/main/scala/spark/streaming/Scheduler.scala
index b77986a3ba..23a0f0974d 100644
--- a/streaming/src/main/scala/spark/streaming/Scheduler.scala
+++ b/streaming/src/main/scala/spark/streaming/Scheduler.scala
@@ -40,7 +40,11 @@ class Scheduler(ssc: StreamingContext) extends Logging {
         clock.asInstanceOf[ManualClock].setTime(lastTime + jumpTime)
       }
       // Reschedule the batches that were received but not processed before failure
-      ssc.initialCheckpoint.pendingTimes.foreach(time => generateRDDs(time))
+      //ssc.initialCheckpoint.pendingTimes.foreach(time => generateRDDs(time))
+      val pendingTimes = ssc.initialCheckpoint.pendingTimes.sorted(Time.ordering)
+      println(pendingTimes.mkString(", "))
+      pendingTimes.foreach(time =>
+        graph.generateRDDs(time).foreach(jobManager.runJob))
       // Restart the timer
       timer.restart(graph.zeroTime.milliseconds)
       logInfo("Scheduler's timer restarted")
@@ -64,11 +68,11 @@ class Scheduler(ssc: StreamingContext) extends Logging {
     graph.generateRDDs(time).foreach(jobManager.runJob)
     graph.forgetOldRDDs(time)
     doCheckpoint(time)
-    logInfo("Generated RDDs for time " + time)
   }
 
   private def doCheckpoint(time: Time) {
     if (ssc.checkpointDuration != null && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
+      logInfo("Checkpointing graph for time " + time)
       val startTime = System.currentTimeMillis()
       ssc.graph.updateCheckpointData(time)
       checkpointWriter.write(new Checkpoint(ssc, time))
diff --git a/streaming/src/main/scala/spark/streaming/Time.scala b/streaming/src/main/scala/spark/streaming/Time.scala
index 5daeb761dd..8a6c9a5cb5 100644
--- a/streaming/src/main/scala/spark/streaming/Time.scala
+++ b/streaming/src/main/scala/spark/streaming/Time.scala
@@ -39,4 +39,8 @@ case class Time(private val millis: Long) {
 
   override def toString: String = (millis.toString + " ms")
 
+}
+
+object Time {
+  val ordering = Ordering.by((time: Time) => time.millis)
 }
\ No newline at end of file
diff --git a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
index c6ffb252ce..10ccb4318d 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
@@ -128,7 +128,7 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
   private[streaming]
   class FileInputDStreamCheckpointData extends DStreamCheckpointData(this) {
 
-     def hadoopFiles = data.asInstanceOf[HashMap[Time, Array[String]]]
+    def hadoopFiles = data.asInstanceOf[HashMap[Time, Array[String]]]
 
     override def update() {
       hadoopFiles.clear()
@@ -139,11 +139,12 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
 
     override def restore() {
       hadoopFiles.foreach {
-        case (time, files) => {
-          logInfo("Restoring Hadoop RDD for time " + time + " from files " +
-            files.mkString("[", ",", "]") )
-          files
-          generatedRDDs += ((time, filesToRDD(files)))
+        case (t, f) => {
+          // Restore the metadata in both files and generatedRDDs
+          logInfo("Restoring files for time " + t + " - " +
+            f.mkString("[", ", ", "]") )
+          files += ((t, f))
+          generatedRDDs += ((t, filesToRDD(f)))
         }
       }
     }
diff --git a/streaming/src/test/scala/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/spark/streaming/FailureSuite.scala
index c4cfffbfc1..efaa098d2e 100644
--- a/streaming/src/test/scala/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/FailureSuite.scala
@@ -1,58 +1,58 @@
 package spark.streaming
 
-import org.scalatest.BeforeAndAfter
+import org.scalatest.{FunSuite, BeforeAndAfter}
 import org.apache.commons.io.FileUtils
 import java.io.File
 import scala.runtime.RichInt
 import scala.util.Random
 import spark.streaming.StreamingContext._
-import collection.mutable.ArrayBuffer
+import collection.mutable.{SynchronizedBuffer, ArrayBuffer}
 import spark.Logging
+import com.google.common.io.Files
 
 /**
  * This testsuite tests master failures at random times while the stream is running using
  * the real clock.
  */
-class FailureSuite extends TestSuiteBase with BeforeAndAfter {
+class FailureSuite extends FunSuite with BeforeAndAfter with Logging {
+
+  var testDir: File = null
+  var checkpointDir: File = null
+  val batchDuration = Milliseconds(500)
 
   before {
-    FileUtils.deleteDirectory(new File(checkpointDir))
+    testDir = Files.createTempDir()
+    checkpointDir = Files.createTempDir()
   }
 
   after {
     FailureSuite.reset()
-    FileUtils.deleteDirectory(new File(checkpointDir))
+    FileUtils.deleteDirectory(checkpointDir)
+    FileUtils.deleteDirectory(testDir)
 
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.driver.port")
   }
 
-  override def framework = "CheckpointSuite"
-
-  override def batchDuration = Milliseconds(500)
-
-  override def checkpointDir = "checkpoint"
-
-  override def checkpointInterval = batchDuration
-
   test("multiple failures with updateStateByKey") {
     val n = 30
     // Input: time=1 ==> [ a ] , time=2 ==> [ a, a ] , time=3 ==> [ a, a, a ] , ...
-    val input = (1 to n).map(i => (1 to i).map(_ =>"a").toSeq).toSeq
-    // Last output: [ (a, 465) ]   for n=30
-    val lastOutput = Seq( ("a", (1 to n).reduce(_ + _)) )
+    val input = (1 to n).map(i => (1 to i).map(_ => "a").mkString(" ")).toSeq
+    // Expected output: time=1 ==> [ (a, 1) ] , time=2 ==> [ (a, 3) ] , time=3 ==> [ (a,6) ] , ...
+    val expectedOutput = (1 to n).map(i => (1 to i).reduce(_ + _)).map(j => ("a", j))
 
     val operation = (st: DStream[String]) => {
      val updateFunc = (values: Seq[Int], state: Option[RichInt]) => {
        Some(new RichInt(values.foldLeft(0)(_ + _) + state.map(_.self).getOrElse(0)))
      }
-     st.map(x => (x, 1))
-     .updateStateByKey[RichInt](updateFunc)
-     .checkpoint(Seconds(2))
-     .map(t => (t._1, t._2.self))
+     st.flatMap(_.split(" "))
+       .map(x => (x, 1))
+       .updateStateByKey[RichInt](updateFunc)
+       .checkpoint(Seconds(2))
+       .map(t => (t._1, t._2.self))
     }
 
-    testOperationWithMultipleFailures(input, operation, lastOutput, n, n)
+    testOperationWithMultipleFailures(input, operation, expectedOutput)
   }
 
   test("multiple failures with reduceByKeyAndWindow") {
@@ -60,17 +60,18 @@ class FailureSuite extends TestSuiteBase with BeforeAndAfter {
     val w = 100
     assert(w > n, "Window should be much larger than the number of input sets in this test")
     // Input: time=1 ==> [ a ] , time=2 ==> [ a, a ] , time=3 ==> [ a, a, a ] , ...
-    val input = (1 to n).map(i => (1 to i).map(_ =>"a").toSeq).toSeq
-    // Last output: [ (a, 465) ]
-    val lastOutput = Seq( ("a", (1 to n).reduce(_ + _)) )
+    val input = (1 to n).map(i => (1 to i).map(_ => "a").mkString(" ")).toSeq
+    // Expected output: time=1 ==> [ (a, 1) ] , time=2 ==> [ (a, 3) ] , time=3 ==> [ (a,6) ] , ...
+    val expectedOutput = (1 to n).map(i => (1 to i).reduce(_ + _)).map(j => ("a", j))
 
     val operation = (st: DStream[String]) => {
-      st.map(x => (x, 1))
+      st.flatMap(_.split(" "))
+        .map(x => (x, 1))
         .reduceByKeyAndWindow(_ + _, _ - _, batchDuration * w, batchDuration)
         .checkpoint(Seconds(2))
     }
 
-    testOperationWithMultipleFailures(input, operation, lastOutput, n, n)
+    testOperationWithMultipleFailures(input, operation, expectedOutput)
   }
 
 
@@ -79,113 +80,231 @@ class FailureSuite extends TestSuiteBase with BeforeAndAfter {
    * final set of output values is as expected or not. Checking the final value is
    * proof that no intermediate data was lost due to master failures.
    */
-  def testOperationWithMultipleFailures[U: ClassManifest, V: ClassManifest](
-    input: Seq[Seq[U]],
-    operation: DStream[U] => DStream[V],
-    lastExpectedOutput: Seq[V],
-    numBatches: Int,
-    numExpectedOutput: Int
+  def testOperationWithMultipleFailures(
+    input: Seq[String],
+    operation: DStream[String] => DStream[(String, Int)],
+    expectedOutput: Seq[(String, Int)]
   ) {
-    var ssc = setupStreams[U, V](input, operation)
-    val mergedOutput = new ArrayBuffer[Seq[V]]()
+    var ssc = setupStreamsWithFileStream(operation)
 
+    val mergedOutput = new ArrayBuffer[(String, Int)]()
+    val lastExpectedOutput = expectedOutput.last
+
+    val maxTimeToRun = expectedOutput.size * batchDuration.milliseconds * 2
     var totalTimeRan = 0L
-    while(totalTimeRan <= numBatches * batchDuration.milliseconds * 2) {
-      new KillingThread(ssc, numBatches * batchDuration.milliseconds.toInt / 4).start()
-      val (output, timeRan) = runStreamsWithRealClock[V](ssc, numBatches, numExpectedOutput)
 
+    // Start generating files in the a different thread
+    val fileGeneratingThread = new FileGeneratingThread(input, testDir.getPath, batchDuration.milliseconds)
+    fileGeneratingThread.start()
+
+    // Repeatedly start and kill the streaming context until timed out or
+    // all expected output is generated
+    while(!FailureSuite.outputGenerated && !FailureSuite.timedOut) {
+
+      // Start the thread to kill the streaming after some time
+      FailureSuite.failed = false
+      val killingThread = new KillingThread(ssc, batchDuration.milliseconds * 10)
+      killingThread.start()
+
+      // Run the streams with real clock until last expected output is seen or timed out
+      val (output, timeRan) = runStreamsWithRealClock(ssc, lastExpectedOutput, maxTimeToRun - totalTimeRan)
+      if (killingThread.isAlive) killingThread.interrupt()
+
+      // Merge output and time ran and see whether already timed out or not
       mergedOutput ++= output
       totalTimeRan += timeRan
       logInfo("New output = " + output)
       logInfo("Merged output = " + mergedOutput)
       logInfo("Total time spent = " + totalTimeRan)
-      val sleepTime = Random.nextInt(numBatches * batchDuration.milliseconds.toInt / 8)
-      logInfo(
-        "\n-------------------------------------------\n" +
-        "   Restarting stream computation in " + sleepTime + " ms   " +
-        "\n-------------------------------------------\n"
-      )
-      Thread.sleep(sleepTime)
-      FailureSuite.failed = false
-      ssc = new StreamingContext(checkpointDir)
+      if (totalTimeRan > maxTimeToRun) {
+        FailureSuite.timedOut = true
+      }
+
+      if (!FailureSuite.outputGenerated && !FailureSuite.timedOut) {
+        val sleepTime = Random.nextInt(batchDuration.milliseconds.toInt * 2)
+        logInfo(
+          "\n-------------------------------------------\n" +
+            "   Restarting stream computation in " + sleepTime + " ms   " +
+            "\n-------------------------------------------\n"
+        )
+        Thread.sleep(sleepTime)
+      }
+
+      // Recreate the streaming context from checkpoint
+      ssc = new StreamingContext(checkpointDir.getPath)
     }
     ssc.stop()
     ssc = null
+    logInfo("Finished test after " + FailureSuite.failureCount + " failures")
 
-    // Verify whether the last output is the expected one
-    val lastOutput = mergedOutput(mergedOutput.lastIndexWhere(!_.isEmpty))
-    assert(lastOutput.toSet === lastExpectedOutput.toSet)
-    logInfo("Finished computation after " + FailureSuite.failureCount + " failures")
+    if (FailureSuite.timedOut) {
+      logWarning("Timed out with run time of "+ maxTimeToRun + " ms for " +
+        expectedOutput.size + " batches of " + batchDuration)
+    }
+
+    // Verify whether the output is as expected
+    verifyOutput(mergedOutput, expectedOutput)
+    if (fileGeneratingThread.isAlive) fileGeneratingThread.interrupt()
+  }
+
+  /** Sets up the stream operations with file input stream */
+  def setupStreamsWithFileStream(
+      operation: DStream[String] => DStream[(String, Int)]
+  ): StreamingContext = {
+    val ssc = new StreamingContext("local[4]", "FailureSuite", batchDuration)
+    ssc.checkpoint(checkpointDir.getPath)
+    val inputStream = ssc.textFileStream(testDir.getPath)
+    val operatedStream = operation(inputStream)
+    val outputBuffer = new ArrayBuffer[Seq[(String, Int)]] with SynchronizedBuffer[Seq[(String, Int)]]
+    val outputStream = new TestOutputStream(operatedStream, outputBuffer)
+    ssc.registerOutputStream(outputStream)
+    ssc
   }
 
   /**
-   * Runs the streams set up in `ssc` on real clock until the expected max number of
+   * Runs the streams set up in `ssc` on real clock.
    */
-  def runStreamsWithRealClock[V: ClassManifest](
-    ssc: StreamingContext,
-    numBatches: Int,
-    maxExpectedOutput: Int
-  ): (Seq[Seq[V]], Long) = {
+  def runStreamsWithRealClock(
+      ssc: StreamingContext,
+      lastExpectedOutput: (String, Int),
+      timeout: Long
+  ): (Seq[(String, Int)], Long) = {
 
     System.clearProperty("spark.streaming.clock")
 
-    assert(numBatches > 0, "Number of batches to run stream computation is zero")
-    assert(maxExpectedOutput > 0, "Max expected outputs after " + numBatches + " is zero")
-    logInfo("numBatches = " + numBatches + ", maxExpectedOutput = " + maxExpectedOutput)
-
     // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStream[V]]
+    val outputStream = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStream[(String, Int)]]
     val output = outputStream.output
-    val waitTime = (batchDuration.milliseconds * (numBatches.toDouble + 0.5)).toLong
     val startTime = System.currentTimeMillis()
 
-    try {
-      // Start computation
-      ssc.start()
+    // Functions to detect various conditions
+    def hasFailed = FailureSuite.failed
+    def isLastOutputGenerated = !output.flatMap(x => x).isEmpty && output(output.lastIndexWhere(!_.isEmpty)).head == lastExpectedOutput
+    def isTimedOut = System.currentTimeMillis() - startTime > timeout
 
-      // Wait until expected number of output items have been generated
-      while (output.size < maxExpectedOutput && System.currentTimeMillis() - startTime < waitTime && !FailureSuite.failed) {
-        logInfo("output.size = " + output.size + ", maxExpectedOutput = " + maxExpectedOutput)
+    // Start the streaming computation and let it run while ...
+    // (i) StreamingContext has not been shut down yet
+    // (ii) The last expected output has not been generated yet
+    // (iii) Its not timed out yet
+    try {
+      ssc.start()
+      while (!hasFailed && !isLastOutputGenerated && !isTimedOut) {
         Thread.sleep(100)
       }
+      logInfo("Has failed = " + hasFailed)
+      logInfo("Is last output generated = " + isLastOutputGenerated)
+      logInfo("Is timed out = " + isTimedOut)
     } catch {
       case e: Exception => logInfo("Exception while running streams: " + e)
     } finally {
       ssc.stop()
     }
+
+    // Verify whether the output of each batch has only one element
+    assert(output.forall(_.size <= 1), "output of each batch should have only one element")
+
+    // Set appropriate flags is timed out or output has been generated
+    if (isTimedOut) FailureSuite.timedOut = true
+    if (isLastOutputGenerated) FailureSuite.outputGenerated = true
+
     val timeTaken = System.currentTimeMillis() - startTime
     logInfo("" + output.size + " sets of output generated in " + timeTaken + " ms")
-    (output, timeTaken)
+    (output.flatMap(_.headOption), timeTaken)
   }
 
+  /**
+   * Verifies the output value are the same as expected. Since failures can lead to
+   * a batch being processed twice, a batches output may appear more than once
+   * consecutively. To avoid getting confused with those, we eliminate consecutive
+   * duplicate batch outputs of values from the `output`. As a result, the
+   * expected output should not have consecutive batches with the same values as output.
+   */
+  def verifyOutput(output: Seq[(String, Int)], expectedOutput: Seq[(String, Int)]) {
+    // Verify whether expected outputs do not consecutive batches with same output
+    for (i <- 0 until expectedOutput.size - 1) {
+      assert(expectedOutput(i) != expectedOutput(i+1),
+        "Expected output has consecutive duplicate sequence of values")
+    }
 
+    // Match the output with the expected output
+    logInfo(
+      "\n-------------------------------------------\n" +
+        "                Verifying output " +
+        "\n-------------------------------------------\n"
+    )
+    logInfo("Expected output, size = " + expectedOutput.size)
+    logInfo(expectedOutput.mkString("[", ",", "]"))
+    logInfo("Output, size = " + output.size)
+    logInfo(output.mkString("[", ",", "]"))
+    output.foreach(o =>
+      assert(expectedOutput.contains(o), "Expected value " + o + " not found")
+    )
+  }
 }
 
 object FailureSuite {
   var failed = false
+  var outputGenerated = false
+  var timedOut = false
   var failureCount = 0
 
   def reset() {
     failed = false
+    outputGenerated = false
+    timedOut = false
     failureCount = 0
   }
 }
 
-class KillingThread(ssc: StreamingContext, maxKillWaitTime: Int) extends Thread with Logging {
+/**
+ * Thread to kill streaming context after some time.
+ */
+class KillingThread(ssc: StreamingContext, maxKillWaitTime: Long) extends Thread with Logging {
   initLogging()
 
   override def run() {
-    var minKillWaitTime = if (FailureSuite.failureCount == 0) 3000 else 1000 // to allow the first checkpoint
-    val killWaitTime = minKillWaitTime + Random.nextInt(maxKillWaitTime)
-    logInfo("Kill wait time = " + killWaitTime)
-    Thread.sleep(killWaitTime.toLong)
-    logInfo(
-      "\n---------------------------------------\n" +
-      "Killing streaming context after " + killWaitTime + " ms" +
-      "\n---------------------------------------\n"
-    )
-    if (ssc != null) ssc.stop()
-    FailureSuite.failed = true
-    FailureSuite.failureCount += 1
+    try {
+      var minKillWaitTime = if (FailureSuite.failureCount == 0) 5000 else 1000 // to allow the first checkpoint
+      val killWaitTime = minKillWaitTime + math.abs(Random.nextLong % maxKillWaitTime)
+      logInfo("Kill wait time = " + killWaitTime)
+      Thread.sleep(killWaitTime)
+      logInfo(
+        "\n---------------------------------------\n" +
+          "Killing streaming context after " + killWaitTime + " ms" +
+          "\n---------------------------------------\n"
+      )
+      if (ssc != null) {
+        ssc.stop()
+        FailureSuite.failed = true
+        FailureSuite.failureCount += 1
+      }
+      logInfo("Killing thread exited")
+    } catch {
+      case ie: InterruptedException => logInfo("Killing thread interrupted")
+      case e: Exception => logWarning("Exception in killing thread", e)
+    }
   }
 }
+
+/**
+ * Thread to generate input files periodically with the desired text
+ */
+class FileGeneratingThread(input: Seq[String], testDir: String, interval: Long)
+  extends Thread with Logging {
+  initLogging()
+
+  override def run() {
+    try {
+      Thread.sleep(5000) // To make sure that all the streaming context has been set up
+      for (i <- 0 until input.size) {
+        FileUtils.writeStringToFile(new File(testDir, i.toString), input(i).toString + "\n")
+        Thread.sleep(interval)
+      }
+      logInfo("File generating thread exited")
+    } catch {
+      case ie: InterruptedException => logInfo("File generating thread interrupted")
+      case e: Exception => logWarning("File generating in killing thread", e)
+    }
+  }
+}
+

From 39addd380363c0371e935fae50983fe87158c1ac Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 13 Feb 2013 12:17:45 -0800
Subject: [PATCH 283/291] Changed scheduler and file input stream to fix bugs
 in the driver fault tolerance. Added MasterFailureTest to rigorously test
 master fault tolerance with file input stream.

---
 .../main/scala/spark/streaming/DStream.scala  |  23 +-
 .../streaming/DStreamCheckpointData.scala     |   2 +-
 .../scala/spark/streaming/DStreamGraph.scala  |  49 ++-
 .../scala/spark/streaming/JobManager.scala    |  10 +-
 .../scala/spark/streaming/Scheduler.scala     |  92 +++--
 .../src/main/scala/spark/streaming/Time.scala |  10 +
 .../streaming/dstream/FileInputDStream.scala  |  59 ++-
 .../dstream/NetworkInputDStream.scala         |  11 +-
 .../streaming/util/MasterFailureTest.scala    | 375 ++++++++++++++++++
 .../spark/streaming/util/RecurringTimer.scala |  30 +-
 .../java/spark/streaming/JavaAPISuite.java    |  21 +-
 streaming/src/test/resources/log4j.properties |   7 +-
 .../streaming/BasicOperationsSuite.scala      |   2 +
 .../spark/streaming/CheckpointSuite.scala     | 107 +++--
 .../scala/spark/streaming/FailureSuite.scala  | 304 +-------------
 .../spark/streaming/InputStreamsSuite.scala   |  29 +-
 .../scala/spark/streaming/TestSuiteBase.scala |  12 +-
 .../streaming/WindowOperationsSuite.scala     |   2 +
 18 files changed, 693 insertions(+), 452 deletions(-)
 create mode 100644 streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala

diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index 0eb6aad187..0c1b667c0a 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -292,7 +292,7 @@ abstract class DStream[T: ClassManifest] (
    * Generate a SparkStreaming job for the given time. This is an internal method that
    * should not be called directly. This default implementation creates a job
    * that materializes the corresponding RDD. Subclasses of DStream may override this
-   * (eg. ForEachDStream).
+   * to generate their own jobs.
    */
   protected[streaming] def generateJob(time: Time): Option[Job] = {
     getOrCompute(time) match {
@@ -308,19 +308,18 @@ abstract class DStream[T: ClassManifest] (
   }
 
   /**
-   * Dereference RDDs that are older than rememberDuration.
+   * Clear metadata that are older than `rememberDuration` of this DStream.
+   * This is an internal method that should not be called directly. This default
+   * implementation clears the old generated RDDs. Subclasses of DStream may override
+   * this to clear their own metadata along with the generated RDDs.
    */
-  protected[streaming] def forgetOldMetadata(time: Time) {
+  protected[streaming] def clearOldMetadata(time: Time) {
     var numForgotten = 0
-    generatedRDDs.keys.foreach(t => {
-      if (t <= (time - rememberDuration)) {
-        generatedRDDs.remove(t)
-        numForgotten += 1
-        logInfo("Forgot RDD of time " + t + " from " + this)
-      }
-    })
-    logInfo("Forgot " + numForgotten + " RDDs from " + this)
-    dependencies.foreach(_.forgetOldMetadata(time))
+    val oldRDDs = generatedRDDs.filter(_._1 <= (time - rememberDuration))
+    generatedRDDs --= oldRDDs.keys
+    logInfo("Cleared " + oldRDDs.size + " RDDs that were older than " +
+      (time - rememberDuration) + ": " + oldRDDs.keys.mkString(", "))
+    dependencies.foreach(_.clearOldMetadata(time))
   }
 
   /* Adds metadata to the Stream while it is running. 
diff --git a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala b/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
index a375980b84..6b0fade7c6 100644
--- a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
@@ -87,7 +87,7 @@ class DStreamCheckpointData[T: ClassManifest] (dstream: DStream[T])
   }
 
   override def toString() = {
-    "[\n" + checkpointFiles.size + "\n" + checkpointFiles.mkString("\n") + "\n]"
+    "[\n" + checkpointFiles.size + " checkpoint files \n" + checkpointFiles.mkString("\n") + "\n]"
   }
 }
 
diff --git a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
index 7aa9d20004..22d9e24f05 100644
--- a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
@@ -11,17 +11,20 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
   private val inputStreams = new ArrayBuffer[InputDStream[_]]()
   private val outputStreams = new ArrayBuffer[DStream[_]]()
 
-  private[streaming] var zeroTime: Time = null
-  private[streaming] var batchDuration: Duration = null
-  private[streaming] var rememberDuration: Duration = null
-  private[streaming] var checkpointInProgress = false
+  var rememberDuration: Duration = null
+  var checkpointInProgress = false
 
-  private[streaming] def start(time: Time) {
+  var zeroTime: Time = null
+  var startTime: Time = null
+  var batchDuration: Duration = null
+
+  def start(time: Time) {
     this.synchronized {
       if (zeroTime != null) {
         throw new Exception("DStream graph computation already started")
       }
       zeroTime = time
+      startTime = time
       outputStreams.foreach(_.initialize(zeroTime))
       outputStreams.foreach(_.remember(rememberDuration))
       outputStreams.foreach(_.validate)
@@ -29,19 +32,23 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     }
   }
 
-  private[streaming] def stop() {
+  def restart(time: Time) {
+    this.synchronized { startTime = time }
+  }
+
+  def stop() {
     this.synchronized {
       inputStreams.par.foreach(_.stop())
     }
   }
 
-  private[streaming] def setContext(ssc: StreamingContext) {
+  def setContext(ssc: StreamingContext) {
     this.synchronized {
       outputStreams.foreach(_.setContext(ssc))
     }
   }
 
-  private[streaming] def setBatchDuration(duration: Duration) {
+  def setBatchDuration(duration: Duration) {
     this.synchronized {
       if (batchDuration != null) {
         throw new Exception("Batch duration already set as " + batchDuration +
@@ -51,61 +58,61 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     batchDuration = duration
   }
 
-  private[streaming] def remember(duration: Duration) {
+  def remember(duration: Duration) {
     this.synchronized {
       if (rememberDuration != null) {
         throw new Exception("Batch duration already set as " + batchDuration +
           ". cannot set it again.")
       }
+      rememberDuration = duration
     }
-    rememberDuration = duration
   }
 
-  private[streaming] def addInputStream(inputStream: InputDStream[_]) {
+  def addInputStream(inputStream: InputDStream[_]) {
     this.synchronized {
       inputStream.setGraph(this)
       inputStreams += inputStream
     }
   }
 
-  private[streaming] def addOutputStream(outputStream: DStream[_]) {
+  def addOutputStream(outputStream: DStream[_]) {
     this.synchronized {
       outputStream.setGraph(this)
       outputStreams += outputStream
     }
   }
 
-  private[streaming] def getInputStreams() = this.synchronized { inputStreams.toArray }
+  def getInputStreams() = this.synchronized { inputStreams.toArray }
 
-  private[streaming] def getOutputStreams() = this.synchronized { outputStreams.toArray }
+  def getOutputStreams() = this.synchronized { outputStreams.toArray }
 
-  private[streaming] def generateRDDs(time: Time): Seq[Job] = {
+  def generateRDDs(time: Time): Seq[Job] = {
     this.synchronized {
       logInfo("Generating RDDs for time " + time)
       outputStreams.flatMap(outputStream => outputStream.generateJob(time))
     }
   }
 
-  private[streaming] def forgetOldRDDs(time: Time) {
+  def clearOldMetadata(time: Time) {
     this.synchronized {
-      logInfo("Forgetting old RDDs for time " + time)
-      outputStreams.foreach(_.forgetOldMetadata(time))
+      logInfo("Clearing old metadata for time " + time)
+      outputStreams.foreach(_.clearOldMetadata(time))
     }
   }
 
-  private[streaming] def updateCheckpointData(time: Time) {
+  def updateCheckpointData(time: Time) {
     this.synchronized {
       outputStreams.foreach(_.updateCheckpointData(time))
     }
   }
 
-  private[streaming] def restoreCheckpointData() {
+  def restoreCheckpointData() {
     this.synchronized {
       outputStreams.foreach(_.restoreCheckpointData())
     }
   }
 
-  private[streaming] def validate() {
+  def validate() {
     this.synchronized {
       assert(batchDuration != null, "Batch duration has not been set")
       //assert(batchDuration >= Milliseconds(100), "Batch duration of " + batchDuration + " is very low")
diff --git a/streaming/src/main/scala/spark/streaming/JobManager.scala b/streaming/src/main/scala/spark/streaming/JobManager.scala
index 8b18c7bc6a..649494ff4a 100644
--- a/streaming/src/main/scala/spark/streaming/JobManager.scala
+++ b/streaming/src/main/scala/spark/streaming/JobManager.scala
@@ -38,13 +38,19 @@ class JobManager(ssc: StreamingContext, numThreads: Int = 1) extends Logging {
     logInfo("Added " + job + " to queue")
   }
 
+  def stop() {
+    jobExecutor.shutdown()
+  }
+
   private def clearJob(job: Job) {
     jobs.synchronized {
-      val jobsOfTime = jobs.get(job.time)
+      val time = job.time
+      val jobsOfTime = jobs.get(time)
       if (jobsOfTime.isDefined) {
         jobsOfTime.get -= job
         if (jobsOfTime.get.isEmpty) {
-          jobs -= job.time
+          ssc.scheduler.clearOldMetadata(time)
+          jobs -= time
         }
       } else {
         throw new Exception("Job finished for time " + job.time +
diff --git a/streaming/src/main/scala/spark/streaming/Scheduler.scala b/streaming/src/main/scala/spark/streaming/Scheduler.scala
index 23a0f0974d..57d494da83 100644
--- a/streaming/src/main/scala/spark/streaming/Scheduler.scala
+++ b/streaming/src/main/scala/spark/streaming/Scheduler.scala
@@ -9,11 +9,8 @@ class Scheduler(ssc: StreamingContext) extends Logging {
 
   initLogging()
 
-  val graph = ssc.graph
-
   val concurrentJobs = System.getProperty("spark.streaming.concurrentJobs", "1").toInt
   val jobManager = new JobManager(ssc, concurrentJobs)
-
   val checkpointWriter = if (ssc.checkpointDuration != null && ssc.checkpointDir != null) {
     new CheckpointWriter(ssc.checkpointDir)
   } else {
@@ -24,53 +21,80 @@ class Scheduler(ssc: StreamingContext) extends Logging {
   val clock = Class.forName(clockClass).newInstance().asInstanceOf[Clock]
   val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds,
     longTime => generateRDDs(new Time(longTime)))
+  val graph = ssc.graph
 
-  def start() {
-    // If context was started from checkpoint, then restart timer such that
-    // this timer's triggers occur at the same time as the original timer.
-    // Otherwise just start the timer from scratch, and initialize graph based
-    // on this first trigger time of the timer.
+  def start() = synchronized {
     if (ssc.isCheckpointPresent) {
-      // If manual clock is being used for testing, then
-      // either set the manual clock to the last checkpointed time,
-      // or if the property is defined set it to that time
-      if (clock.isInstanceOf[ManualClock]) {
-        val lastTime = ssc.initialCheckpoint.checkpointTime.milliseconds
-        val jumpTime = System.getProperty("spark.streaming.manualClock.jump", "0").toLong
-        clock.asInstanceOf[ManualClock].setTime(lastTime + jumpTime)
-      }
-      // Reschedule the batches that were received but not processed before failure
-      //ssc.initialCheckpoint.pendingTimes.foreach(time => generateRDDs(time))
-      val pendingTimes = ssc.initialCheckpoint.pendingTimes.sorted(Time.ordering)
-      println(pendingTimes.mkString(", "))
-      pendingTimes.foreach(time =>
-        graph.generateRDDs(time).foreach(jobManager.runJob))
-      // Restart the timer
-      timer.restart(graph.zeroTime.milliseconds)
-      logInfo("Scheduler's timer restarted")
+      restart()
     } else {
-      val firstTime = new Time(timer.start())
-      graph.start(firstTime - ssc.graph.batchDuration)
-      logInfo("Scheduler's timer started")
+      startFirstTime()
     }
     logInfo("Scheduler started")
   }
   
-  def stop() {
+  def stop() = synchronized {
     timer.stop()
-    graph.stop()
+    jobManager.stop()
+    ssc.graph.stop()
     logInfo("Scheduler stopped")    
   }
-  
-  private def generateRDDs(time: Time) {
+
+  private def startFirstTime() {
+    val startTime = new Time(timer.getStartTime())
+    graph.start(startTime - graph.batchDuration)
+    timer.start(startTime.milliseconds)
+    logInfo("Scheduler's timer started at " + startTime)
+  }
+
+  private def restart() {
+
+    // If manual clock is being used for testing, then
+    // either set the manual clock to the last checkpointed time,
+    // or if the property is defined set it to that time
+    if (clock.isInstanceOf[ManualClock]) {
+      val lastTime = ssc.initialCheckpoint.checkpointTime.milliseconds
+      val jumpTime = System.getProperty("spark.streaming.manualClock.jump", "0").toLong
+      clock.asInstanceOf[ManualClock].setTime(lastTime + jumpTime)
+    }
+
+    val batchDuration = ssc.graph.batchDuration
+
+    // Batches when the master was down, that is,
+    // between the checkpoint and current restart time
+    val checkpointTime = ssc.initialCheckpoint.checkpointTime
+    val restartTime = new Time(timer.getRestartTime(graph.zeroTime.milliseconds))
+    val downTimes = checkpointTime.until(restartTime, batchDuration)
+    logInfo("Batches during down time: " + downTimes.mkString(", "))
+
+    // Batches that were unprocessed before failure
+    val pendingTimes = ssc.initialCheckpoint.pendingTimes
+    logInfo("Batches pending processing: " + pendingTimes.mkString(", "))
+    // Reschedule jobs for these times
+    val timesToReschedule = (pendingTimes ++ downTimes).distinct.sorted(Time.ordering)
+    logInfo("Batches to reschedule: " + timesToReschedule.mkString(", "))
+    timesToReschedule.foreach(time =>
+      graph.generateRDDs(time).foreach(jobManager.runJob)
+    )
+
+    // Restart the timer
+    timer.start(restartTime.milliseconds)
+    logInfo("Scheduler's timer restarted")
+  }
+
+  /** Generates the RDDs, clears old metadata and does checkpoint for the given time */
+  def generateRDDs(time: Time) {
     SparkEnv.set(ssc.env)
     logInfo("\n-----------------------------------------------------\n")
     graph.generateRDDs(time).foreach(jobManager.runJob)
-    graph.forgetOldRDDs(time)
     doCheckpoint(time)
   }
 
-  private def doCheckpoint(time: Time) {
+
+  def clearOldMetadata(time: Time) {
+    ssc.graph.clearOldMetadata(time)
+  }
+
+  def doCheckpoint(time: Time) {
     if (ssc.checkpointDuration != null && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
       logInfo("Checkpointing graph for time " + time)
       val startTime = System.currentTimeMillis()
diff --git a/streaming/src/main/scala/spark/streaming/Time.scala b/streaming/src/main/scala/spark/streaming/Time.scala
index 8a6c9a5cb5..8201e84a20 100644
--- a/streaming/src/main/scala/spark/streaming/Time.scala
+++ b/streaming/src/main/scala/spark/streaming/Time.scala
@@ -37,6 +37,16 @@ case class Time(private val millis: Long) {
 
   def max(that: Time): Time = if (this > that) this else that
 
+  def until(that: Time, interval: Duration): Seq[Time] = {
+    assert(that > this, "Cannot create sequence as " + that + " not more than " + this)
+    assert(
+      (that - this).isMultipleOf(interval),
+      "Cannot create sequence as gap between " + that + " and " +
+        this + " is not multiple of " + interval
+    )
+    (this.milliseconds) until (that.milliseconds) by (interval.milliseconds) map (new Time(_))
+  }
+
   override def toString: String = (millis.toString + " ms")
 
 }
diff --git a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
index 10ccb4318d..41b9bd9461 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
@@ -21,19 +21,21 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
 
   protected[streaming] override val checkpointData = new FileInputDStreamCheckpointData
 
+  // Latest file mod time seen till any point of time
   private val lastModTimeFiles = new HashSet[String]()
   private var lastModTime = 0L
 
   @transient private var path_ : Path = null
   @transient private var fs_ : FileSystem = null
-  @transient private var files = new HashMap[Time, Array[String]]
+  @transient private[streaming] var files = new HashMap[Time, Array[String]]
 
   override def start() {
     if (newFilesOnly) {
-      lastModTime = System.currentTimeMillis()
+      lastModTime = graph.zeroTime.milliseconds
     } else {
       lastModTime = 0
     }
+    logDebug("LastModTime initialized to " + lastModTime + ", new files only = " + newFilesOnly)
   }
   
   override def stop() { }
@@ -43,38 +45,50 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
    * a union RDD out of them. Note that this maintains the list of files that were processed
    * in the latest modification time in the previous call to this method. This is because the
    * modification time returned by the FileStatus API seems to return times only at the
-   * granularity of seconds. Hence, new files may have the same modification time as the
-   * latest modification time in the previous call to this method and the list of files
-   * maintained is used to filter the one that have been processed.
+   * granularity of seconds. And new files may have the same modification time as the
+   * latest modification time in the previous call to this method yet was not reported in
+   * the previous call.
    */
   override def compute(validTime: Time): Option[RDD[(K, V)]] = {
+    assert(validTime.milliseconds >= lastModTime, "Trying to get new files for really old time [" + validTime + " < " + lastModTime)
+
     // Create the filter for selecting new files
     val newFilter = new PathFilter() {
+      // Latest file mod time seen in this round of fetching files and its corresponding files
       var latestModTime = 0L
       val latestModTimeFiles = new HashSet[String]()
 
       def accept(path: Path): Boolean = {
-        if (!filter(path)) {
+        if (!filter(path)) {  // Reject file if it does not satisfy filter
+          logDebug("Rejected by filter " + path)
           return false
-        } else {
+        } else {              // Accept file only if
           val modTime = fs.getFileStatus(path).getModificationTime()
-          if (modTime < lastModTime){
-            return false
+          logDebug("Mod time for " + path + " is " + modTime)
+          if (modTime < lastModTime) {
+            logDebug("Mod time less than last mod time")
+            return false  // If the file was created before the last time it was called
           } else if (modTime == lastModTime && lastModTimeFiles.contains(path.toString)) {
-            return false
+            logDebug("Mod time equal to last mod time, but file considered already")
+            return false  // If the file was created exactly as lastModTime but not reported yet
+          } else if (modTime > validTime.milliseconds) {
+            logDebug("Mod time more than valid time")
+            return false  // If the file was created after the time this function call requires
           }
           if (modTime > latestModTime) {
             latestModTime = modTime
             latestModTimeFiles.clear()
+            logDebug("Latest mod time updated to " + latestModTime)
           }
           latestModTimeFiles += path.toString
+          logDebug("Accepted " + path)
           return true
         }        
       }
     }
-
+    logDebug("Finding new files at time " + validTime + " for last mod time = " + lastModTime)
     val newFiles = fs.listStatus(path, newFilter).map(_.getPath.toString)
-    logInfo("New files: " + newFiles.mkString(", "))
+    logInfo("New files at time " + validTime + ":\n" + newFiles.mkString("\n"))
     if (newFiles.length > 0) {
       // Update the modification time and the files processed for that modification time
       if (lastModTime != newFilter.latestModTime) {
@@ -82,17 +96,21 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
         lastModTimeFiles.clear()
       }
       lastModTimeFiles ++= newFilter.latestModTimeFiles
+      logDebug("Last mod time updated to " + lastModTime)
     }
     files += ((validTime, newFiles))
     Some(filesToRDD(newFiles))
   }
 
-  /** Forget the old time-to-files mappings along with old RDDs */
-  protected[streaming] override def forgetOldMetadata(time: Time) {
-    super.forgetOldMetadata(time)
-    val filesToBeRemoved = files.filter(_._1 <= (time - rememberDuration))
-    files --= filesToBeRemoved.keys
-    logInfo("Forgot " + filesToBeRemoved.size + " files from " + this)
+  /** Clear the old time-to-files mappings along with old RDDs */
+  protected[streaming] override def clearOldMetadata(time: Time) {
+    super.clearOldMetadata(time)
+    val oldFiles = files.filter(_._1 <= (time - rememberDuration))
+    files --= oldFiles.keys
+    logInfo("Cleared " + oldFiles.size + " old files that were older than " +
+      (time - rememberDuration) + ": " + oldFiles.keys.mkString(", "))
+    logDebug("Cleared files are:\n" +
+      oldFiles.map(p => (p._1, p._2.mkString(", "))).mkString("\n"))
   }
 
   /** Generate one RDD from an array of files */
@@ -148,6 +166,11 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
         }
       }
     }
+
+    override def toString() = {
+      "[\n" + hadoopFiles.size + " file sets\n" +
+        hadoopFiles.map(p => (p._1, p._2.mkString(", "))).mkString("\n") + "\n]"
+    }
   }
 }
 
diff --git a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala
index 8c322dd698..ecc75ec913 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala
@@ -46,8 +46,15 @@ abstract class NetworkInputDStream[T: ClassManifest](@transient ssc_ : Streaming
   def stop() {}
 
   override def compute(validTime: Time): Option[RDD[T]] = {
-    val blockIds = ssc.networkInputTracker.getBlockIds(id, validTime)    
-    Some(new BlockRDD[T](ssc.sc, blockIds))
+    // If this is called for any time before the start time of the context,
+    // then this returns an empty RDD. This may happen when recovering from a
+    // master failure forces
+    if (validTime >= graph.startTime) {
+      val blockIds = ssc.networkInputTracker.getBlockIds(id, validTime)
+      Some(new BlockRDD[T](ssc.sc, blockIds))
+    } else {
+      Some(new BlockRDD[T](ssc.sc, Array[String]()))
+    }
   }
 }
 
diff --git a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala b/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
new file mode 100644
index 0000000000..3ffe4b64d0
--- /dev/null
+++ b/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
@@ -0,0 +1,375 @@
+package spark.streaming.util
+
+import spark.{Logging, RDD}
+import spark.streaming._
+import spark.streaming.dstream.ForEachDStream
+import StreamingContext._
+
+import scala.util.Random
+import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+
+import java.io.{File, ObjectInputStream, IOException}
+import java.util.UUID
+
+import com.google.common.io.Files
+
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.fs.{FileUtil, FileSystem, Path}
+import org.apache.hadoop.conf.Configuration
+
+
+private[streaming]
+object MasterFailureTest extends Logging {
+  initLogging()
+
+  @volatile var killed = false
+  @volatile var killCount = 0
+
+  def main(args: Array[String]) {
+    if (args.size < 2) {
+      println(
+        "Usage: MasterFailureTest <local/HDFS directory> <# batches> [<batch size in milliseconds>]")
+      System.exit(1)
+    }
+    val directory = args(0)
+    val numBatches = args(1).toInt
+    val batchDuration = if (args.size > 2) Milliseconds(args(2).toInt) else Seconds(1)
+
+    println("\n\n========================= MAP TEST =========================\n\n")
+    testMap(directory, numBatches, batchDuration)
+
+    println("\n\n================= UPDATE-STATE-BY-KEY TEST =================\n\n")
+    testUpdateStateByKey(directory, numBatches, batchDuration)
+  }
+
+  def testMap(directory: String, numBatches: Int, batchDuration: Duration) {
+    // Input: time=1 ==> [ 1 ] , time=2 ==> [ 2 ] , time=3 ==> [ 3 ] , ...
+    val input = (1 to numBatches).map(_.toString).toSeq
+    // Expected output: time=1 ==> [ 1 ] , time=2 ==> [ 2 ] , time=3 ==> [ 3 ] , ...
+    val expectedOutput = (1 to numBatches)
+
+    val operation = (st: DStream[String]) => st.map(_.toInt)
+
+    // Run streaming operation with multiple master failures
+    val output = testOperation(directory, batchDuration, input, operation, expectedOutput)
+
+    logInfo("Expected output, size = " + expectedOutput.size)
+    logInfo(expectedOutput.mkString("[", ",", "]"))
+    logInfo("Output, size = " + output.size)
+    logInfo(output.mkString("[", ",", "]"))
+
+    // Verify whether all the values of the expected output is present
+    // in the output
+    assert(output.distinct.toSet == expectedOutput.toSet)
+  }
+
+
+  def testUpdateStateByKey(directory: String, numBatches: Int, batchDuration: Duration) {
+    // Input: time=1 ==> [ a ] , time=2 ==> [ a, a ] , time=3 ==> [ a, a, a ] , ...
+    val input = (1 to numBatches).map(i => (1 to i).map(_ => "a").mkString(" ")).toSeq
+    // Expected output: time=1 ==> [ (a, 1) ] , time=2 ==> [ (a, 3) ] , time=3 ==> [ (a,6) ] , ...
+    val expectedOutput = (1L to numBatches).map(i => (1L to i).reduce(_ + _)).map(j => ("a", j))
+
+    val operation = (st: DStream[String]) => {
+      val updateFunc = (values: Seq[Long], state: Option[Long]) => {
+        Some(values.foldLeft(0L)(_ + _) + state.getOrElse(0L))
+      }
+      st.flatMap(_.split(" "))
+        .map(x => (x, 1L))
+        .updateStateByKey[Long](updateFunc)
+        .checkpoint(batchDuration * 5)
+    }
+
+    // Run streaming operation with multiple master failures
+    val output = testOperation(directory, batchDuration, input, operation, expectedOutput)
+
+    logInfo("Expected output, size = " + expectedOutput.size + "\n" + expectedOutput)
+    logInfo("Output, size = " + output.size + "\n" + output)
+
+    // Verify whether all the values in the output are among the expected output values
+    output.foreach(o =>
+      assert(expectedOutput.contains(o), "Expected value " + o + " not found")
+    )
+
+    // Verify whether the last expected output value has been generated, there by
+    // confirming that none of the inputs have been missed
+    assert(output.last == expectedOutput.last)
+  }
+
+  /**
+   * Tests stream operation with multiple master failures, and verifies whether the
+   * final set of output values is as expected or not.
+   */
+  def testOperation[T: ClassManifest](
+    directory: String,
+    batchDuration: Duration,
+    input: Seq[String],
+    operation: DStream[String] => DStream[T],
+    expectedOutput: Seq[T]
+  ): Seq[T] = {
+
+    // Just making sure that the expected output does not have duplicates
+    assert(expectedOutput.distinct.toSet == expectedOutput.toSet)
+
+    // Setup the stream computation with the given operation
+    val (ssc, checkpointDir, testDir) = setupStreams(directory, batchDuration, operation)
+
+    // Start generating files in the a different thread
+    val fileGeneratingThread = new FileGeneratingThread(input, testDir, batchDuration.milliseconds)
+    fileGeneratingThread.start()
+
+    // Run the streams and repeatedly kill it until the last expected output
+    // has been generated, or until it has run for twice the expected time
+    val lastExpectedOutput = expectedOutput.last
+    val maxTimeToRun = expectedOutput.size * batchDuration.milliseconds * 2
+    val mergedOutput = runStreams(ssc, lastExpectedOutput, maxTimeToRun)
+
+    // Delete directories
+    fileGeneratingThread.join()
+    val fs = checkpointDir.getFileSystem(new Configuration())
+    fs.delete(checkpointDir, true)
+    fs.delete(testDir, true)
+    logInfo("Finished test after " + killCount + " failures")
+    mergedOutput
+  }
+
+  /**
+   * Sets up the stream computation with the given operation, directory (local or HDFS),
+   * and batch duration. Returns the streaming context and the directory to which
+   * files should be written for testing.
+   */
+  private def setupStreams[T: ClassManifest](
+      directory: String,
+      batchDuration: Duration,
+      operation: DStream[String] => DStream[T]
+    ): (StreamingContext, Path, Path) = {
+    // Reset all state
+    reset()
+
+    // Create the directories for this test
+    val uuid = UUID.randomUUID().toString
+    val rootDir = new Path(directory, uuid)
+    val fs = rootDir.getFileSystem(new Configuration())
+    val checkpointDir = new Path(rootDir, "checkpoint")
+    val testDir = new Path(rootDir, "test")
+    fs.mkdirs(checkpointDir)
+    fs.mkdirs(testDir)
+
+    // Setup the streaming computation with the given operation
+    System.clearProperty("spark.driver.port")
+    var ssc = new StreamingContext("local[4]", "MasterFailureTest", batchDuration)
+    ssc.checkpoint(checkpointDir.toString)
+    val inputStream = ssc.textFileStream(testDir.toString)
+    val operatedStream = operation(inputStream)
+    val outputStream = new TestOutputStream(operatedStream)
+    ssc.registerOutputStream(outputStream)
+    (ssc, checkpointDir, testDir)
+  }
+
+
+  /**
+   * Repeatedly starts and kills the streaming context until timed out or
+   * the last expected output is generated. Finally, return
+   */
+  private def runStreams[T: ClassManifest](
+      ssc_ : StreamingContext,
+      lastExpectedOutput: T,
+      maxTimeToRun: Long
+   ): Seq[T] = {
+
+    var ssc = ssc_
+    var totalTimeRan = 0L
+    var isLastOutputGenerated = false
+    var isTimedOut = false
+    val mergedOutput = new ArrayBuffer[T]()
+    val checkpointDir = ssc.checkpointDir
+    var batchDuration = ssc.graph.batchDuration
+
+    while(!isLastOutputGenerated && !isTimedOut) {
+      // Get the output buffer
+      val outputBuffer = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStream[T]].output
+      def output = outputBuffer.flatMap(x => x)
+
+      // Start the thread to kill the streaming after some time
+      killed = false
+      val killingThread = new KillingThread(ssc, batchDuration.milliseconds * 10)
+      killingThread.start()
+
+      var timeRan = 0L
+      try {
+        // Start the streaming computation and let it run while ...
+        // (i) StreamingContext has not been shut down yet
+        // (ii) The last expected output has not been generated yet
+        // (iii) Its not timed out yet
+        System.clearProperty("spark.streaming.clock")
+        System.clearProperty("spark.driver.port")
+        ssc.start()
+        val startTime = System.currentTimeMillis()
+        while (!killed && !isLastOutputGenerated && !isTimedOut) {
+          Thread.sleep(100)
+          timeRan = System.currentTimeMillis() - startTime
+          isLastOutputGenerated = (!output.isEmpty && output.last == lastExpectedOutput)
+          isTimedOut = (timeRan + totalTimeRan > maxTimeToRun)
+        }
+      } catch {
+        case e: Exception => logError("Error running streaming context", e)
+      }
+      if (killingThread.isAlive) killingThread.interrupt()
+      ssc.stop()
+
+      logInfo("Has been killed = " + killed)
+      logInfo("Is last output generated = " + isLastOutputGenerated)
+      logInfo("Is timed out = " + isTimedOut)
+
+      // Verify whether the output of each batch has only one element or no element
+      // and then merge the new output with all the earlier output
+      mergedOutput ++= output
+      totalTimeRan += timeRan
+      logInfo("New output = " + output)
+      logInfo("Merged output = " + mergedOutput)
+      logInfo("Time ran = " + timeRan)
+      logInfo("Total time ran = " + totalTimeRan)
+
+      if (!isLastOutputGenerated && !isTimedOut) {
+        val sleepTime = Random.nextInt(batchDuration.milliseconds.toInt * 10)
+        logInfo(
+          "\n-------------------------------------------\n" +
+            "   Restarting stream computation in " + sleepTime + " ms   " +
+            "\n-------------------------------------------\n"
+        )
+        Thread.sleep(sleepTime)
+        // Recreate the streaming context from checkpoint
+        ssc = new StreamingContext(checkpointDir)
+      }
+    }
+    mergedOutput
+  }
+
+  /**
+   * Verifies the output value are the same as expected. Since failures can lead to
+   * a batch being processed twice, a batches output may appear more than once
+   * consecutively. To avoid getting confused with those, we eliminate consecutive
+   * duplicate batch outputs of values from the `output`. As a result, the
+   * expected output should not have consecutive batches with the same values as output.
+   */
+  private def verifyOutput[T: ClassManifest](output: Seq[T], expectedOutput: Seq[T]) {
+    // Verify whether expected outputs do not consecutive batches with same output
+    for (i <- 0 until expectedOutput.size - 1) {
+      assert(expectedOutput(i) != expectedOutput(i+1),
+        "Expected output has consecutive duplicate sequence of values")
+    }
+
+    // Log the output
+    println("Expected output, size = " + expectedOutput.size)
+    println(expectedOutput.mkString("[", ",", "]"))
+    println("Output, size = " + output.size)
+    println(output.mkString("[", ",", "]"))
+
+    // Match the output with the expected output
+    output.foreach(o =>
+      assert(expectedOutput.contains(o), "Expected value " + o + " not found")
+    )
+  }
+
+  /** Resets counter to prepare for the test */
+  private def reset() {
+    killed = false
+    killCount = 0
+  }
+}
+
+/**
+ * This is a output stream just for testing. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ */
+private[streaming]
+class TestOutputStream[T: ClassManifest](
+    parent: DStream[T],
+    val output: ArrayBuffer[Seq[T]] = new ArrayBuffer[Seq[T]] with SynchronizedBuffer[Seq[T]]
+  ) extends ForEachDStream[T](
+    parent,
+    (rdd: RDD[T], t: Time) => {
+      val collected = rdd.collect()
+      output += collected
+      println(t + ": " + collected.mkString("[", ",", "]"))
+    }
+  ) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream) {
+    ois.defaultReadObject()
+    output.clear()
+  }
+}
+
+
+/**
+ * Thread to kill streaming context after a random period of time.
+ */
+private[streaming]
+class KillingThread(ssc: StreamingContext, maxKillWaitTime: Long) extends Thread with Logging {
+  initLogging()
+
+  override def run() {
+    try {
+      // If it is the first killing, then allow the first checkpoint to be created
+      var minKillWaitTime = if (MasterFailureTest.killCount == 0) 5000 else 1000
+      val killWaitTime = minKillWaitTime + math.abs(Random.nextLong % maxKillWaitTime)
+      logInfo("Kill wait time = " + killWaitTime)
+      Thread.sleep(killWaitTime)
+      logInfo(
+        "\n---------------------------------------\n" +
+          "Killing streaming context after " + killWaitTime + " ms" +
+          "\n---------------------------------------\n"
+      )
+      if (ssc != null) {
+        ssc.stop()
+        MasterFailureTest.killed = true
+        MasterFailureTest.killCount += 1
+      }
+      logInfo("Killing thread finished normally")
+    } catch {
+      case ie: InterruptedException => logInfo("Killing thread interrupted")
+      case e: Exception => logWarning("Exception in killing thread", e)
+    }
+
+  }
+}
+
+
+/**
+ * Thread to generate input files periodically with the desired text.
+ */
+private[streaming]
+class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
+  extends Thread with Logging {
+  initLogging()
+
+  override def run() {
+    val localTestDir = Files.createTempDir()
+    val fs = testDir.getFileSystem(new Configuration())
+    try {
+      Thread.sleep(5000) // To make sure that all the streaming context has been set up
+      for (i <- 0 until input.size) {
+        // Write the data to a local file and then move it to the target test directory
+        val localFile = new File(localTestDir, (i+1).toString)
+        val hadoopFile = new Path(testDir, (i+1).toString)
+        FileUtils.writeStringToFile(localFile, input(i).toString + "\n")
+        //fs.moveFromLocalFile(new Path(localFile.toString), new Path(testDir, i.toString))
+        fs.copyFromLocalFile(new Path(localFile.toString), hadoopFile)
+        logInfo("Generated file " + hadoopFile + " at " + System.currentTimeMillis)
+        Thread.sleep(interval)
+        localFile.delete()
+      }
+      logInfo("File generating thread finished normally")
+    } catch {
+      case ie: InterruptedException => logInfo("File generating thread interrupted")
+      case e: Exception => logWarning("File generating in killing thread", e)
+    } finally {
+      fs.close()
+    }
+  }
+}
+
+
diff --git a/streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala b/streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala
index db715cc295..8e10276deb 100644
--- a/streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala
+++ b/streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala
@@ -3,9 +3,9 @@ package spark.streaming.util
 private[streaming]
 class RecurringTimer(val clock: Clock, val period: Long, val callback: (Long) => Unit) {
   
-  val minPollTime = 25L
+  private val minPollTime = 25L
   
-  val pollTime = {
+  private val pollTime = {
     if (period / 10.0 > minPollTime) {
       (period / 10.0).toLong
     } else {
@@ -13,11 +13,20 @@ class RecurringTimer(val clock: Clock, val period: Long, val callback: (Long) =>
     }  
   }
   
-  val thread = new Thread() {
+  private val thread = new Thread() {
     override def run() { loop }    
   }
   
-  var nextTime = 0L   
+  private var nextTime = 0L
+
+  def getStartTime(): Long = {
+    (math.floor(clock.currentTime.toDouble / period) + 1).toLong * period
+  }
+
+  def getRestartTime(originalStartTime: Long): Long = {
+    val gap = clock.currentTime - originalStartTime
+    (math.floor(gap.toDouble / period).toLong + 1) * period + originalStartTime
+  }
 
   def start(startTime: Long): Long = {
     nextTime = startTime
@@ -26,21 +35,14 @@ class RecurringTimer(val clock: Clock, val period: Long, val callback: (Long) =>
   }
 
   def start(): Long = {
-    val startTime = (math.floor(clock.currentTime.toDouble / period) + 1).toLong * period
-    start(startTime)
+    start(getStartTime())
   }
 
-  def restart(originalStartTime: Long): Long = {
-    val gap = clock.currentTime - originalStartTime
-    val newStartTime = (math.floor(gap.toDouble / period).toLong + 1) * period + originalStartTime
-    start(newStartTime)
-  }
-  
-  def stop() { 
+  def stop() {
     thread.interrupt() 
   }
   
-  def loop() {
+  private def loop() {
     try {
       while (true) {
         clock.waitTillTime(nextTime)
diff --git a/streaming/src/test/java/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
index fbe4af4597..783a393a8f 100644
--- a/streaming/src/test/java/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
@@ -33,7 +33,8 @@ public class JavaAPISuite implements Serializable {
 
   @Before
   public void setUp() {
-    ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+      System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock");
+      ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
     ssc.checkpoint("checkpoint", new Duration(1000));
   }
 
@@ -45,7 +46,7 @@ public class JavaAPISuite implements Serializable {
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.driver.port");
   }
-  /*
+
   @Test
   public void testCount() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -434,7 +435,7 @@ public class JavaAPISuite implements Serializable {
 
     assertOrderInvariantEquals(expected, result);
   }
-  */
+
   /*
    * Performs an order-invariant comparison of lists representing two RDD streams. This allows
    * us to account for ordering variation within individual RDD's which occurs during windowing.
@@ -450,7 +451,7 @@ public class JavaAPISuite implements Serializable {
     Assert.assertEquals(expected, actual);
   }
 
-  /*
+
   // PairDStream Functions
   @Test
   public void testPairFilter() {
@@ -897,7 +898,7 @@ public class JavaAPISuite implements Serializable {
 
     Assert.assertEquals(expected, result);
   }
-  */
+
   @Test
   public void testCheckpointMasterRecovery() throws InterruptedException {
     List<List<String>> inputData = Arrays.asList(
@@ -964,7 +965,7 @@ public class JavaAPISuite implements Serializable {
     assertOrderInvariantEquals(expected, result1);
   }
   */
-  /*
+
   // Input stream tests. These mostly just test that we can instantiate a given InputStream with
   // Java arguments and assign it to a JavaDStream without producing type errors. Testing of the
   // InputStream functionality is deferred to the existing Scala tests.
@@ -972,9 +973,9 @@ public class JavaAPISuite implements Serializable {
   public void testKafkaStream() {
     HashMap<String, Integer> topics = Maps.newHashMap();
     HashMap<KafkaPartitionKey, Long> offsets = Maps.newHashMap();
-    JavaDStream test1 = ssc.kafkaStream("localhost", 12345, "group", topics);
-    JavaDStream test2 = ssc.kafkaStream("localhost", 12345, "group", topics, offsets);
-    JavaDStream test3 = ssc.kafkaStream("localhost", 12345, "group", topics, offsets,
+    JavaDStream test1 = ssc.kafkaStream("localhost:12345", "group", topics);
+    JavaDStream test2 = ssc.kafkaStream("localhost:12345", "group", topics, offsets);
+    JavaDStream test3 = ssc.kafkaStream("localhost:12345", "group", topics, offsets,
       StorageLevel.MEMORY_AND_DISK());
   }
 
@@ -1026,5 +1027,5 @@ public class JavaAPISuite implements Serializable {
   public void testFileStream() {
     JavaPairDStream<String, String> foo =
       ssc.<String, String, SequenceFileInputFormat>fileStream("/tmp/foo");
-  }*/
+  }
 }
diff --git a/streaming/src/test/resources/log4j.properties b/streaming/src/test/resources/log4j.properties
index edfa1243fa..5652596e1e 100644
--- a/streaming/src/test/resources/log4j.properties
+++ b/streaming/src/test/resources/log4j.properties
@@ -1,6 +1,7 @@
 # Set everything to be logged to the file streaming/target/unit-tests.log 
-log4j.rootCategory=INFO, file
-log4j.appender.file=org.apache.log4j.FileAppender
+log4j.rootCategory=WARN, file
+# log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file=org.apache.log4j.ConsoleAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=streaming/target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
@@ -8,4 +9,6 @@ log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}:
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.spark.streaming=INFO
+log4j.logger.spark.streaming.dstream.FileInputDStream=DEBUG
 
diff --git a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
index c031949dd1..12388b8887 100644
--- a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
@@ -6,6 +6,8 @@ import util.ManualClock
 
 class BasicOperationsSuite extends TestSuiteBase {
 
+  System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+
   override def framework() = "BasicOperationsSuite"
 
   after {
diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
index 7126af62d9..c89c4a8d43 100644
--- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
@@ -1,5 +1,6 @@
 package spark.streaming
 
+import dstream.FileInputDStream
 import spark.streaming.StreamingContext._
 import java.io.File
 import runtime.RichInt
@@ -10,8 +11,16 @@ import util.{Clock, ManualClock}
 import scala.util.Random
 import com.google.common.io.Files
 
+
+/**
+ * This test suites tests the checkpointing functionality of DStreams -
+ * the checkpointing of a DStream's RDDs as well as the checkpointing of
+ * the whole DStream graph.
+ */
 class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
 
+  System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+
   before {
     FileUtils.deleteDirectory(new File(checkpointDir))
   }
@@ -64,7 +73,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     // Run till a time such that at least one RDD in the stream should have been checkpointed,
     // then check whether some RDD has been checkpointed or not
     ssc.start()
-    runStreamsWithRealDelay(ssc, firstNumBatches)
+    advanceTimeWithRealDelay(ssc, firstNumBatches)
     logInfo("Checkpoint data of state stream = \n" + stateStream.checkpointData)
     assert(!stateStream.checkpointData.checkpointFiles.isEmpty, "No checkpointed RDDs in state stream before first failure")
     stateStream.checkpointData.checkpointFiles.foreach {
@@ -77,7 +86,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     // Run till a further time such that previous checkpoint files in the stream would be deleted
     // and check whether the earlier checkpoint files are deleted
     val checkpointFiles = stateStream.checkpointData.checkpointFiles.map(x => new File(x._2))
-    runStreamsWithRealDelay(ssc, secondNumBatches)
+    advanceTimeWithRealDelay(ssc, secondNumBatches)
     checkpointFiles.foreach(file => assert(!file.exists, "Checkpoint file '" + file + "' was not deleted"))
     ssc.stop()
 
@@ -92,7 +101,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     // Run one batch to generate a new checkpoint file and check whether some RDD
     // is present in the checkpoint data or not
     ssc.start()
-    runStreamsWithRealDelay(ssc, 1)
+    advanceTimeWithRealDelay(ssc, 1)
     assert(!stateStream.checkpointData.checkpointFiles.isEmpty, "No checkpointed RDDs in state stream before second failure")
     stateStream.checkpointData.checkpointFiles.foreach {
       case (time, data) => {
@@ -113,7 +122,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     // Adjust manual clock time as if it is being restarted after a delay
     System.setProperty("spark.streaming.manualClock.jump", (batchDuration.milliseconds * 7).toString)
     ssc.start()
-    runStreamsWithRealDelay(ssc, 4)
+    advanceTimeWithRealDelay(ssc, 4)
     ssc.stop()
     System.clearProperty("spark.streaming.manualClock.jump")
     ssc = null
@@ -168,74 +177,95 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
   }
 
   // This tests whether file input stream remembers what files were seen before
-  // the master failure and uses them again to process a large window operatoin.
+  // the master failure and uses them again to process a large window operation.
   // It also tests whether batches, whose processing was incomplete due to the
   // failure, are re-processed or not.
   test("recovery with file input stream") {
+    // Disable manual clock as FileInputDStream does not work with manual clock
+    val clockProperty = System.getProperty("spark.streaming.clock")
+    System.clearProperty("spark.streaming.clock")
+
     // Set up the streaming context and input streams
     val testDir = Files.createTempDir()
-    var ssc = new StreamingContext(master, framework, batchDuration)
+    var ssc = new StreamingContext(master, framework, Seconds(1))
     ssc.checkpoint(checkpointDir, checkpointInterval)
     val fileStream = ssc.textFileStream(testDir.toString)
     // Making value 3 take large time to process, to ensure that the master
     // shuts down in the middle of processing the 3rd batch
     val mappedStream = fileStream.map(s => {
       val i = s.toInt
-      if (i == 3) Thread.sleep(1000)
+      if (i == 3) Thread.sleep(2000)
       i
     })
+
     // Reducing over a large window to ensure that recovery from master failure
     // requires reprocessing of all the files seen before the failure
-    val reducedStream = mappedStream.reduceByWindow(_ + _, batchDuration * 30, batchDuration)
+    val reducedStream = mappedStream.reduceByWindow(_ + _, Seconds(30), Seconds(1))
     val outputBuffer = new ArrayBuffer[Seq[Int]]
     var outputStream = new TestOutputStream(reducedStream, outputBuffer)
     ssc.registerOutputStream(outputStream)
     ssc.start()
 
     // Create files and advance manual clock to process them
-    var clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+    //var clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
     Thread.sleep(1000)
     for (i <- Seq(1, 2, 3)) {
       FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
       // wait to make sure that the file is written such that it gets shown in the file listings
-      Thread.sleep(500)
-      clock.addToTime(batchDuration.milliseconds)
-      // wait to make sure that FileInputDStream picks up this file only and not any other file
-      Thread.sleep(500)
+      Thread.sleep(1000)
     }
     logInfo("Output = " + outputStream.output.mkString(","))
     assert(outputStream.output.size > 0, "No files processed before restart")
     ssc.stop()
 
+    // Verify whether files created have been recorded correctly or not
+    var fileInputDStream = ssc.graph.getInputStreams().head.asInstanceOf[FileInputDStream[_, _, _]]
+    def recordedFiles = fileInputDStream.files.values.flatMap(x => x)
+    assert(!recordedFiles.filter(_.endsWith("1")).isEmpty)
+    assert(!recordedFiles.filter(_.endsWith("2")).isEmpty)
+    assert(!recordedFiles.filter(_.endsWith("3")).isEmpty)
+
     // Create files while the master is down
     for (i <- Seq(4, 5, 6)) {
       FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
       Thread.sleep(1000)
     }
 
-    // Restart stream computation from checkpoint and create more files to see whether
-    // they are being processed
+    // Recover context from checkpoint file and verify whether the files that were
+    // recorded before failure were saved and successfully recovered
     logInfo("*********** RESTARTING ************")
     ssc = new StreamingContext(checkpointDir)
+    fileInputDStream = ssc.graph.getInputStreams().head.asInstanceOf[FileInputDStream[_, _, _]]
+    assert(!recordedFiles.filter(_.endsWith("1")).isEmpty)
+    assert(!recordedFiles.filter(_.endsWith("2")).isEmpty)
+    assert(!recordedFiles.filter(_.endsWith("3")).isEmpty)
+
+    // Restart stream computation
     ssc.start()
-    clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
     for (i <- Seq(7, 8, 9)) {
       FileUtils.writeStringToFile(new File(testDir, i.toString), i.toString + "\n")
-      Thread.sleep(500)
-      clock.addToTime(batchDuration.milliseconds)
-      Thread.sleep(500)
+      Thread.sleep(1000)
     }
     Thread.sleep(1000)
-    logInfo("Output = " + outputStream.output.mkString(","))
+    logInfo("Output = " + outputStream.output.mkString("[", ", ", "]"))
     assert(outputStream.output.size > 0, "No files processed after restart")
     ssc.stop()
 
+    // Verify whether files created while the driver was down have been recorded or not
+    assert(!recordedFiles.filter(_.endsWith("4")).isEmpty)
+    assert(!recordedFiles.filter(_.endsWith("5")).isEmpty)
+    assert(!recordedFiles.filter(_.endsWith("6")).isEmpty)
+
+    // Verify whether new files created after recover have been recorded or not
+    assert(!recordedFiles.filter(_.endsWith("7")).isEmpty)
+    assert(!recordedFiles.filter(_.endsWith("8")).isEmpty)
+    assert(!recordedFiles.filter(_.endsWith("9")).isEmpty)
+
     // Append the new output to the old buffer
     outputStream = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[Int]]
     outputBuffer ++= outputStream.output
 
-    // Verify whether data received by Spark Streaming was as expected
-    val expectedOutput = Seq(1, 3, 6, 28, 36, 45)
+    val expectedOutput = Seq(1, 3, 6, 10, 15, 21, 28, 36, 45)
     logInfo("--------------------------------")
     logInfo("output, size = " + outputBuffer.size)
     outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
@@ -244,11 +274,17 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     logInfo("--------------------------------")
 
     // Verify whether all the elements received are as expected
-    assert(outputBuffer.size === expectedOutput.size)
-    for (i <- 0 until outputBuffer.size) {
-      assert(outputBuffer(i).size === 1)
-      assert(outputBuffer(i).head === expectedOutput(i))
-    }
+    val output = outputBuffer.flatMap(x => x)
+    assert(output.contains(6))  // To ensure that the 3rd input (i.e., 3) was processed
+    output.foreach(o =>         // To ensure all the inputs are correctly added cumulatively
+      assert(expectedOutput.contains(o), "Expected value " + o + " not found")
+    )
+    // To ensure that all the inputs were received correctly
+    assert(expectedOutput.last === output.last)
+
+    // Enable manual clock back again for other tests
+    if (clockProperty != null)
+      System.setProperty("spark.streaming.clock", clockProperty)
   }
 
 
@@ -278,7 +314,9 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
 
     // Do the computation for initial number of batches, create checkpoint file and quit
     ssc = setupStreams[U, V](input, operation)
-    val output = runStreams[V](ssc, initialNumBatches, initialNumExpectedOutputs)
+    ssc.start()
+    val output = advanceTimeWithRealDelay[V](ssc, initialNumBatches)
+    ssc.stop()
     verifyOutput[V](output, expectedOutput.take(initialNumBatches), true)
     Thread.sleep(1000)
 
@@ -289,17 +327,20 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
       "\n-------------------------------------------\n"
     )
     ssc = new StreamingContext(checkpointDir)
-    val outputNew = runStreams[V](ssc, nextNumBatches, nextNumExpectedOutputs)
+    System.clearProperty("spark.driver.port")
+    ssc.start()
+    val outputNew = advanceTimeWithRealDelay[V](ssc, nextNumBatches)
     // the first element will be re-processed data of the last batch before restart
     verifyOutput[V](outputNew, expectedOutput.takeRight(nextNumExpectedOutputs), true)
+    ssc.stop()
     ssc = null
   }
 
   /**
    * Advances the manual clock on the streaming scheduler by given number of batches.
-   * It also wait for the expected amount of time for each batch.
+   * It also waits for the expected amount of time for each batch.
    */
-  def runStreamsWithRealDelay(ssc: StreamingContext, numBatches: Long) {
+  def advanceTimeWithRealDelay[V: ClassManifest](ssc: StreamingContext, numBatches: Long): Seq[Seq[V]] = {
     val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
     logInfo("Manual clock before advancing = " + clock.time)
     for (i <- 1 to numBatches.toInt) {
@@ -308,6 +349,8 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     }
     logInfo("Manual clock after advancing = " + clock.time)
     Thread.sleep(batchDuration.milliseconds)
-  }
 
+    val outputStream = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStream[V]]
+    outputStream.output
+  }
 }
\ No newline at end of file
diff --git a/streaming/src/test/scala/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/spark/streaming/FailureSuite.scala
index efaa098d2e..a5fa7ab92d 100644
--- a/streaming/src/test/scala/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/FailureSuite.scala
@@ -1,14 +1,15 @@
 package spark.streaming
 
-import org.scalatest.{FunSuite, BeforeAndAfter}
-import org.apache.commons.io.FileUtils
-import java.io.File
-import scala.runtime.RichInt
-import scala.util.Random
-import spark.streaming.StreamingContext._
-import collection.mutable.{SynchronizedBuffer, ArrayBuffer}
 import spark.Logging
+import spark.streaming.util.MasterFailureTest
+import StreamingContext._
+
+import org.scalatest.{FunSuite, BeforeAndAfter}
 import com.google.common.io.Files
+import java.io.File
+import org.apache.commons.io.FileUtils
+import collection.mutable.ArrayBuffer
+
 
 /**
  * This testsuite tests master failures at random times while the stream is running using
@@ -16,295 +17,24 @@ import com.google.common.io.Files
  */
 class FailureSuite extends FunSuite with BeforeAndAfter with Logging {
 
-  var testDir: File = null
-  var checkpointDir: File = null
-  val batchDuration = Milliseconds(500)
+  var directory = "FailureSuite"
+  val numBatches = 30
+  val batchDuration = Milliseconds(1000)
 
   before {
-    testDir = Files.createTempDir()
-    checkpointDir = Files.createTempDir()
+    FileUtils.deleteDirectory(new File(directory))
   }
 
   after {
-    FailureSuite.reset()
-    FileUtils.deleteDirectory(checkpointDir)
-    FileUtils.deleteDirectory(testDir)
+    FileUtils.deleteDirectory(new File(directory))
+  }
 
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port")
+  test("multiple failures with map") {
+    MasterFailureTest.testMap(directory, numBatches, batchDuration)
   }
 
   test("multiple failures with updateStateByKey") {
-    val n = 30
-    // Input: time=1 ==> [ a ] , time=2 ==> [ a, a ] , time=3 ==> [ a, a, a ] , ...
-    val input = (1 to n).map(i => (1 to i).map(_ => "a").mkString(" ")).toSeq
-    // Expected output: time=1 ==> [ (a, 1) ] , time=2 ==> [ (a, 3) ] , time=3 ==> [ (a,6) ] , ...
-    val expectedOutput = (1 to n).map(i => (1 to i).reduce(_ + _)).map(j => ("a", j))
-
-    val operation = (st: DStream[String]) => {
-     val updateFunc = (values: Seq[Int], state: Option[RichInt]) => {
-       Some(new RichInt(values.foldLeft(0)(_ + _) + state.map(_.self).getOrElse(0)))
-     }
-     st.flatMap(_.split(" "))
-       .map(x => (x, 1))
-       .updateStateByKey[RichInt](updateFunc)
-       .checkpoint(Seconds(2))
-       .map(t => (t._1, t._2.self))
-    }
-
-    testOperationWithMultipleFailures(input, operation, expectedOutput)
-  }
-
-  test("multiple failures with reduceByKeyAndWindow") {
-    val n = 30
-    val w = 100
-    assert(w > n, "Window should be much larger than the number of input sets in this test")
-    // Input: time=1 ==> [ a ] , time=2 ==> [ a, a ] , time=3 ==> [ a, a, a ] , ...
-    val input = (1 to n).map(i => (1 to i).map(_ => "a").mkString(" ")).toSeq
-    // Expected output: time=1 ==> [ (a, 1) ] , time=2 ==> [ (a, 3) ] , time=3 ==> [ (a,6) ] , ...
-    val expectedOutput = (1 to n).map(i => (1 to i).reduce(_ + _)).map(j => ("a", j))
-
-    val operation = (st: DStream[String]) => {
-      st.flatMap(_.split(" "))
-        .map(x => (x, 1))
-        .reduceByKeyAndWindow(_ + _, _ - _, batchDuration * w, batchDuration)
-        .checkpoint(Seconds(2))
-    }
-
-    testOperationWithMultipleFailures(input, operation, expectedOutput)
-  }
-
-
-  /**
-   * Tests stream operation with multiple master failures, and verifies whether the
-   * final set of output values is as expected or not. Checking the final value is
-   * proof that no intermediate data was lost due to master failures.
-   */
-  def testOperationWithMultipleFailures(
-    input: Seq[String],
-    operation: DStream[String] => DStream[(String, Int)],
-    expectedOutput: Seq[(String, Int)]
-  ) {
-    var ssc = setupStreamsWithFileStream(operation)
-
-    val mergedOutput = new ArrayBuffer[(String, Int)]()
-    val lastExpectedOutput = expectedOutput.last
-
-    val maxTimeToRun = expectedOutput.size * batchDuration.milliseconds * 2
-    var totalTimeRan = 0L
-
-    // Start generating files in the a different thread
-    val fileGeneratingThread = new FileGeneratingThread(input, testDir.getPath, batchDuration.milliseconds)
-    fileGeneratingThread.start()
-
-    // Repeatedly start and kill the streaming context until timed out or
-    // all expected output is generated
-    while(!FailureSuite.outputGenerated && !FailureSuite.timedOut) {
-
-      // Start the thread to kill the streaming after some time
-      FailureSuite.failed = false
-      val killingThread = new KillingThread(ssc, batchDuration.milliseconds * 10)
-      killingThread.start()
-
-      // Run the streams with real clock until last expected output is seen or timed out
-      val (output, timeRan) = runStreamsWithRealClock(ssc, lastExpectedOutput, maxTimeToRun - totalTimeRan)
-      if (killingThread.isAlive) killingThread.interrupt()
-
-      // Merge output and time ran and see whether already timed out or not
-      mergedOutput ++= output
-      totalTimeRan += timeRan
-      logInfo("New output = " + output)
-      logInfo("Merged output = " + mergedOutput)
-      logInfo("Total time spent = " + totalTimeRan)
-      if (totalTimeRan > maxTimeToRun) {
-        FailureSuite.timedOut = true
-      }
-
-      if (!FailureSuite.outputGenerated && !FailureSuite.timedOut) {
-        val sleepTime = Random.nextInt(batchDuration.milliseconds.toInt * 2)
-        logInfo(
-          "\n-------------------------------------------\n" +
-            "   Restarting stream computation in " + sleepTime + " ms   " +
-            "\n-------------------------------------------\n"
-        )
-        Thread.sleep(sleepTime)
-      }
-
-      // Recreate the streaming context from checkpoint
-      ssc = new StreamingContext(checkpointDir.getPath)
-    }
-    ssc.stop()
-    ssc = null
-    logInfo("Finished test after " + FailureSuite.failureCount + " failures")
-
-    if (FailureSuite.timedOut) {
-      logWarning("Timed out with run time of "+ maxTimeToRun + " ms for " +
-        expectedOutput.size + " batches of " + batchDuration)
-    }
-
-    // Verify whether the output is as expected
-    verifyOutput(mergedOutput, expectedOutput)
-    if (fileGeneratingThread.isAlive) fileGeneratingThread.interrupt()
-  }
-
-  /** Sets up the stream operations with file input stream */
-  def setupStreamsWithFileStream(
-      operation: DStream[String] => DStream[(String, Int)]
-  ): StreamingContext = {
-    val ssc = new StreamingContext("local[4]", "FailureSuite", batchDuration)
-    ssc.checkpoint(checkpointDir.getPath)
-    val inputStream = ssc.textFileStream(testDir.getPath)
-    val operatedStream = operation(inputStream)
-    val outputBuffer = new ArrayBuffer[Seq[(String, Int)]] with SynchronizedBuffer[Seq[(String, Int)]]
-    val outputStream = new TestOutputStream(operatedStream, outputBuffer)
-    ssc.registerOutputStream(outputStream)
-    ssc
-  }
-
-  /**
-   * Runs the streams set up in `ssc` on real clock.
-   */
-  def runStreamsWithRealClock(
-      ssc: StreamingContext,
-      lastExpectedOutput: (String, Int),
-      timeout: Long
-  ): (Seq[(String, Int)], Long) = {
-
-    System.clearProperty("spark.streaming.clock")
-
-    // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStream[(String, Int)]]
-    val output = outputStream.output
-    val startTime = System.currentTimeMillis()
-
-    // Functions to detect various conditions
-    def hasFailed = FailureSuite.failed
-    def isLastOutputGenerated = !output.flatMap(x => x).isEmpty && output(output.lastIndexWhere(!_.isEmpty)).head == lastExpectedOutput
-    def isTimedOut = System.currentTimeMillis() - startTime > timeout
-
-    // Start the streaming computation and let it run while ...
-    // (i) StreamingContext has not been shut down yet
-    // (ii) The last expected output has not been generated yet
-    // (iii) Its not timed out yet
-    try {
-      ssc.start()
-      while (!hasFailed && !isLastOutputGenerated && !isTimedOut) {
-        Thread.sleep(100)
-      }
-      logInfo("Has failed = " + hasFailed)
-      logInfo("Is last output generated = " + isLastOutputGenerated)
-      logInfo("Is timed out = " + isTimedOut)
-    } catch {
-      case e: Exception => logInfo("Exception while running streams: " + e)
-    } finally {
-      ssc.stop()
-    }
-
-    // Verify whether the output of each batch has only one element
-    assert(output.forall(_.size <= 1), "output of each batch should have only one element")
-
-    // Set appropriate flags is timed out or output has been generated
-    if (isTimedOut) FailureSuite.timedOut = true
-    if (isLastOutputGenerated) FailureSuite.outputGenerated = true
-
-    val timeTaken = System.currentTimeMillis() - startTime
-    logInfo("" + output.size + " sets of output generated in " + timeTaken + " ms")
-    (output.flatMap(_.headOption), timeTaken)
-  }
-
-  /**
-   * Verifies the output value are the same as expected. Since failures can lead to
-   * a batch being processed twice, a batches output may appear more than once
-   * consecutively. To avoid getting confused with those, we eliminate consecutive
-   * duplicate batch outputs of values from the `output`. As a result, the
-   * expected output should not have consecutive batches with the same values as output.
-   */
-  def verifyOutput(output: Seq[(String, Int)], expectedOutput: Seq[(String, Int)]) {
-    // Verify whether expected outputs do not consecutive batches with same output
-    for (i <- 0 until expectedOutput.size - 1) {
-      assert(expectedOutput(i) != expectedOutput(i+1),
-        "Expected output has consecutive duplicate sequence of values")
-    }
-
-    // Match the output with the expected output
-    logInfo(
-      "\n-------------------------------------------\n" +
-        "                Verifying output " +
-        "\n-------------------------------------------\n"
-    )
-    logInfo("Expected output, size = " + expectedOutput.size)
-    logInfo(expectedOutput.mkString("[", ",", "]"))
-    logInfo("Output, size = " + output.size)
-    logInfo(output.mkString("[", ",", "]"))
-    output.foreach(o =>
-      assert(expectedOutput.contains(o), "Expected value " + o + " not found")
-    )
-  }
-}
-
-object FailureSuite {
-  var failed = false
-  var outputGenerated = false
-  var timedOut = false
-  var failureCount = 0
-
-  def reset() {
-    failed = false
-    outputGenerated = false
-    timedOut = false
-    failureCount = 0
-  }
-}
-
-/**
- * Thread to kill streaming context after some time.
- */
-class KillingThread(ssc: StreamingContext, maxKillWaitTime: Long) extends Thread with Logging {
-  initLogging()
-
-  override def run() {
-    try {
-      var minKillWaitTime = if (FailureSuite.failureCount == 0) 5000 else 1000 // to allow the first checkpoint
-      val killWaitTime = minKillWaitTime + math.abs(Random.nextLong % maxKillWaitTime)
-      logInfo("Kill wait time = " + killWaitTime)
-      Thread.sleep(killWaitTime)
-      logInfo(
-        "\n---------------------------------------\n" +
-          "Killing streaming context after " + killWaitTime + " ms" +
-          "\n---------------------------------------\n"
-      )
-      if (ssc != null) {
-        ssc.stop()
-        FailureSuite.failed = true
-        FailureSuite.failureCount += 1
-      }
-      logInfo("Killing thread exited")
-    } catch {
-      case ie: InterruptedException => logInfo("Killing thread interrupted")
-      case e: Exception => logWarning("Exception in killing thread", e)
-    }
-  }
-}
-
-/**
- * Thread to generate input files periodically with the desired text
- */
-class FileGeneratingThread(input: Seq[String], testDir: String, interval: Long)
-  extends Thread with Logging {
-  initLogging()
-
-  override def run() {
-    try {
-      Thread.sleep(5000) // To make sure that all the streaming context has been set up
-      for (i <- 0 until input.size) {
-        FileUtils.writeStringToFile(new File(testDir, i.toString), input(i).toString + "\n")
-        Thread.sleep(interval)
-      }
-      logInfo("File generating thread exited")
-    } catch {
-      case ie: InterruptedException => logInfo("File generating thread interrupted")
-      case e: Exception => logWarning("File generating in killing thread", e)
-    }
+    MasterFailureTest.testUpdateStateByKey(directory, numBatches, batchDuration)
   }
 }
 
diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
index 0eb9c7b81e..7c1c2e1040 100644
--- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
@@ -133,26 +133,29 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
 
   test("file input stream") {
+    // Disable manual clock as FileInputDStream does not work with manual clock
+    System.clearProperty("spark.streaming.clock")
+
     // Set up the streaming context and input streams
     val testDir = Files.createTempDir()
     val ssc = new StreamingContext(master, framework, batchDuration)
-    val filestream = ssc.textFileStream(testDir.toString)
+    val fileStream = ssc.textFileStream(testDir.toString)
     val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
     def output = outputBuffer.flatMap(x => x)
-    val outputStream = new TestOutputStream(filestream, outputBuffer)
+    val outputStream = new TestOutputStream(fileStream, outputBuffer)
     ssc.registerOutputStream(outputStream)
     ssc.start()
 
     // Create files in the temporary directory so that Spark Streaming can read data from it
-    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
     val input = Seq(1, 2, 3, 4, 5)
     val expectedOutput = input.map(_.toString)
     Thread.sleep(1000)
     for (i <- 0 until input.size) {
-      FileUtils.writeStringToFile(new File(testDir, i.toString), input(i).toString + "\n")
-      Thread.sleep(500)
-      clock.addToTime(batchDuration.milliseconds)
-      //Thread.sleep(100)
+      val file = new File(testDir, i.toString)
+      FileUtils.writeStringToFile(file, input(i).toString + "\n")
+      logInfo("Created file " + file)
+      Thread.sleep(batchDuration.milliseconds)
+      Thread.sleep(1000)
     }
     val startTime = System.currentTimeMillis()
     Thread.sleep(1000)
@@ -171,16 +174,16 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
     // Verify whether all the elements received are as expected
     // (whether the elements were received one in each interval is not verified)
-    assert(output.size === expectedOutput.size)
-    for (i <- 0 until output.size) {
-      assert(output(i).size === 1)
-      assert(output(i).head.toString === expectedOutput(i))
-    }
+    assert(output.toList === expectedOutput.toList)
+
     FileUtils.deleteDirectory(testDir)
+
+    // Enable manual clock back again for other tests
+    System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
   }
 }
 
-
+/** This is server to test the network input stream */
 class TestServer(port: Int) extends Logging {
 
   val queue = new ArrayBlockingQueue[String](100)
diff --git a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
index c2733831b2..2cc31d6137 100644
--- a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
@@ -63,20 +63,28 @@ class TestOutputStream[T: ClassManifest](parent: DStream[T], val output: ArrayBu
  */
 trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
 
+  // Name of the framework for Spark context
   def framework = "TestSuiteBase"
 
+  // Master for Spark context
   def master = "local[2]"
 
+  // Batch duration
   def batchDuration = Seconds(1)
 
+  // Directory where the checkpoint data will be saved
   def checkpointDir = "checkpoint"
 
+  // Duration after which the graph is checkpointed
   def checkpointInterval = batchDuration
 
+  // Number of partitions of the input parallel collections created for testing
   def numInputPartitions = 2
 
+  // Maximum time to wait before the test times out
   def maxWaitTimeMillis = 10000
 
+  // Whether to actually wait in real time before changing manual clock
   def actuallyWait = false
 
   /**
@@ -140,9 +148,6 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
       numBatches: Int,
       numExpectedOutput: Int
     ): Seq[Seq[V]] = {
-
-    System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
-
     assert(numBatches > 0, "Number of batches to run stream computation is zero")
     assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
     logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
@@ -186,7 +191,6 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     } finally {
       ssc.stop()
     }
-
     output
   }
 
diff --git a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
index cd9608df53..1080790147 100644
--- a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
@@ -5,6 +5,8 @@ import collection.mutable.ArrayBuffer
 
 class WindowOperationsSuite extends TestSuiteBase {
 
+  System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+
   override def framework = "WindowOperationsSuite"
 
   override def maxWaitTimeMillis = 20000

From 12b020b6689b8db94df904d9b897a43bce18c971 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 13 Feb 2013 20:53:50 -0800
Subject: [PATCH 284/291] Added filter functionality to reduceByKeyAndWindow
 with inverse. Consolidated reduceByKeyAndWindow's many functions into smaller
 number of functions with optional parameters.

---
 .../main/scala/spark/streaming/DStream.scala  |  2 +-
 .../streaming/PairDStreamFunctions.scala      | 71 ++++++++-----------
 .../streaming/api/java/JavaPairDStream.scala  | 28 +++++---
 .../dstream/ReducedWindowedDStream.scala      | 30 +++++---
 .../streaming/util/MasterFailureTest.scala    |  1 -
 streaming/src/test/resources/log4j.properties |  2 +-
 .../streaming/WindowOperationsSuite.scala     | 49 ++++++++-----
 7 files changed, 102 insertions(+), 81 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index 0c1b667c0a..6abec9e6be 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -531,7 +531,7 @@ abstract class DStream[T: ClassManifest] (
       windowDuration: Duration,
       slideDuration: Duration
     ): DStream[T] = {
-    this.window(windowDuration, slideDuration).reduce(reduceFunc)
+    this.reduce(reduceFunc).window(windowDuration, slideDuration).reduce(reduceFunc)
   }
 
   def reduceByWindow(
diff --git a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala b/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
index fbcf061126..021ff83b36 100644
--- a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
@@ -137,7 +137,8 @@ extends Serializable {
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
-   * @param numPartitions  Number of partitions of each RDD in the new DStream.
+   * @param numPartitions  number of partitions of each RDD in the new DStream; if not specified
+   *                       then Spark's default number of partitions will be used
    */
   def groupByKeyAndWindow(
       windowDuration: Duration,
@@ -155,7 +156,7 @@ extends Serializable {
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
-   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream.
+   * @param partitioner    partitioner for controlling the partitioning of each RDD in the new DStream.
    */
   def groupByKeyAndWindow(
       windowDuration: Duration,
@@ -213,7 +214,7 @@ extends Serializable {
    * @param numPartitions  Number of partitions of each RDD in the new DStream.
    */
   def reduceByKeyAndWindow(
-      reduceFunc: (V, V) => V, 
+      reduceFunc: (V, V) => V,
       windowDuration: Duration,
       slideDuration: Duration,
       numPartitions: Int
@@ -230,7 +231,8 @@ extends Serializable {
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
-   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream.
+   * @param partitioner    partitioner for controlling the partitioning of each RDD
+   *                       in the new DStream.
    */
   def reduceByKeyAndWindow(
       reduceFunc: (V, V) => V,
@@ -245,7 +247,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by reducing over a using incremental computation.
+   * Create a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
@@ -253,81 +255,64 @@ extends Serializable {
    * However, it is applicable to only "invertible reduce functions".
    * Hash partitioning is used to generate the RDDs with Spark's default number of partitions.
    * @param reduceFunc associative reduce function
-   * @param invReduceFunc inverse function
+   * @param invReduceFunc inverse reduce function
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
+   * @param filterFunc     Optional function to filter expired key-value pairs;
+   *                       only pairs that satisfy the function are retained
    */
   def reduceByKeyAndWindow(
       reduceFunc: (V, V) => V,
       invReduceFunc: (V, V) => V,
       windowDuration: Duration,
-      slideDuration: Duration
+      slideDuration: Duration = self.slideDuration,
+      numPartitions: Int = ssc.sc.defaultParallelism,
+      filterFunc: ((K, V)) => Boolean = null
     ): DStream[(K, V)] = {
 
     reduceByKeyAndWindow(
-      reduceFunc, invReduceFunc, windowDuration, slideDuration, defaultPartitioner())
+      reduceFunc, invReduceFunc, windowDuration,
+      slideDuration, defaultPartitioner(numPartitions), filterFunc
+    )
   }
 
   /**
-   * Create a new DStream by reducing over a using incremental computation.
+   * Create a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
    * This is more efficient that reduceByKeyAndWindow without "inverse reduce" function.
    * However, it is applicable to only "invertible reduce functions".
-   * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
-   * @param reduceFunc associative reduce function
-   * @param invReduceFunc inverse function
+   * @param reduceFunc     associative reduce function
+   * @param invReduceFunc  inverse reduce function
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
-   * @param numPartitions  Number of partitions of each RDD in the new DStream.
+   * @param partitioner    partitioner for controlling the partitioning of each RDD in the new DStream.
+   * @param filterFunc     Optional function to filter expired key-value pairs;
+   *                       only pairs that satisfy the function are retained
    */
   def reduceByKeyAndWindow(
       reduceFunc: (V, V) => V,
       invReduceFunc: (V, V) => V,
       windowDuration: Duration,
       slideDuration: Duration,
-      numPartitions: Int
-    ): DStream[(K, V)] = {
-
-    reduceByKeyAndWindow(
-      reduceFunc, invReduceFunc, windowDuration, slideDuration, defaultPartitioner(numPartitions))
-  }
-
-  /**
-   * Create a new DStream by reducing over a using incremental computation.
-   * The reduced value of over a new window is calculated using the old window's reduce value :
-   *  1. reduce the new values that entered the window (e.g., adding new counts)
-   *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
-   * This is more efficient that reduceByKeyAndWindow without "inverse reduce" function.
-   * However, it is applicable to only "invertible reduce functions".
-   * @param reduceFunc associative reduce function
-   * @param invReduceFunc inverse function
-   * @param windowDuration width of the window; must be a multiple of this DStream's
-   *                       batching interval
-   * @param slideDuration  sliding interval of the window (i.e., the interval after which
-   *                       the new DStream will generate RDDs); must be a multiple of this
-   *                       DStream's batching interval
-   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream.
-   */
-  def reduceByKeyAndWindow(
-      reduceFunc: (V, V) => V,
-      invReduceFunc: (V, V) => V,
-      windowDuration: Duration,
-      slideDuration: Duration,
-      partitioner: Partitioner
+      partitioner: Partitioner,
+      filterFunc: ((K, V)) => Boolean
     ): DStream[(K, V)] = {
 
     val cleanedReduceFunc = ssc.sc.clean(reduceFunc)
     val cleanedInvReduceFunc = ssc.sc.clean(invReduceFunc)
+    val cleanedFilterFunc = if (filterFunc != null) Some(ssc.sc.clean(filterFunc)) else None
     new ReducedWindowedDStream[K, V](
-      self, cleanedReduceFunc, cleanedInvReduceFunc, windowDuration, slideDuration, partitioner)
+      self, cleanedReduceFunc, cleanedInvReduceFunc, cleanedFilterFunc,
+      windowDuration, slideDuration, partitioner
+    )
   }
 
   /**
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
index ef10c091ca..4d3e0d0304 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
@@ -328,7 +328,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by reducing over a using incremental computation.
+   * Create a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
@@ -342,25 +342,31 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
-   * @param numPartitions  Number of partitions of each RDD in the new DStream.
+   * @param numPartitions  number of partitions of each RDD in the new DStream.
+   * @param filterFunc     function to filter expired key-value pairs;
+   *                       only pairs that satisfy the function are retained
+   *                       set this to null if you do not want to filter
    */
   def reduceByKeyAndWindow(
       reduceFunc: Function2[V, V, V],
       invReduceFunc: Function2[V, V, V],
       windowDuration: Duration,
       slideDuration: Duration,
-      numPartitions: Int
+      numPartitions: Int,
+      filterFunc: JFunction[(K, V), java.lang.Boolean]
     ): JavaPairDStream[K, V] = {
     dstream.reduceByKeyAndWindow(
         reduceFunc,
         invReduceFunc,
         windowDuration,
         slideDuration,
-        numPartitions)
+        numPartitions,
+        (p: (K, V)) => filterFunc(p).booleanValue()
+    )
   }
 
   /**
-   * Create a new DStream by reducing over a using incremental computation.
+   * Create a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
@@ -374,20 +380,26 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
    * @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream.
+   * @param filterFunc     function to filter expired key-value pairs;
+   *                       only pairs that satisfy the function are retained
+   *                       set this to null if you do not want to filter
    */
   def reduceByKeyAndWindow(
       reduceFunc: Function2[V, V, V],
       invReduceFunc: Function2[V, V, V],
       windowDuration: Duration,
       slideDuration: Duration,
-      partitioner: Partitioner
-    ): JavaPairDStream[K, V] = {
+      partitioner: Partitioner,
+      filterFunc: JFunction[(K, V), java.lang.Boolean]
+  ): JavaPairDStream[K, V] = {
     dstream.reduceByKeyAndWindow(
         reduceFunc,
         invReduceFunc,
         windowDuration,
         slideDuration,
-        partitioner)
+        partitioner,
+        (p: (K, V)) => filterFunc(p).booleanValue()
+    )
   }
 
   /**
diff --git a/streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala
index 733d5c4a25..aa5a71e1ed 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala
@@ -3,7 +3,7 @@ package spark.streaming.dstream
 import spark.streaming.StreamingContext._
 
 import spark.RDD
-import spark.rdd.CoGroupedRDD
+import spark.rdd.{CoGroupedRDD, MapPartitionsRDD}
 import spark.Partitioner
 import spark.SparkContext._
 import spark.storage.StorageLevel
@@ -15,7 +15,8 @@ private[streaming]
 class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
     parent: DStream[(K, V)],
     reduceFunc: (V, V) => V,
-    invReduceFunc: (V, V) => V, 
+    invReduceFunc: (V, V) => V,
+    filterFunc: Option[((K, V)) => Boolean],
     _windowDuration: Duration,
     _slideDuration: Duration,
     partitioner: Partitioner
@@ -87,22 +88,25 @@ class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
     //
 
     // Get the RDDs of the reduced values in "old time steps"
-    val oldRDDs = reducedStream.slice(previousWindow.beginTime, currentWindow.beginTime - parent.slideDuration)
+    val oldRDDs =
+      reducedStream.slice(previousWindow.beginTime, currentWindow.beginTime - parent.slideDuration)
     logDebug("# old RDDs = " + oldRDDs.size)
 
     // Get the RDDs of the reduced values in "new time steps"
-    val newRDDs = reducedStream.slice(previousWindow.endTime + parent.slideDuration, currentWindow.endTime)
+    val newRDDs =
+      reducedStream.slice(previousWindow.endTime + parent.slideDuration, currentWindow.endTime)
     logDebug("# new RDDs = " + newRDDs.size)
 
     // Get the RDD of the reduced value of the previous window
-    val previousWindowRDD = getOrCompute(previousWindow.endTime).getOrElse(ssc.sc.makeRDD(Seq[(K,V)]()))
+    val previousWindowRDD =
+      getOrCompute(previousWindow.endTime).getOrElse(ssc.sc.makeRDD(Seq[(K,V)]()))
 
     // Make the list of RDDs that needs to cogrouped together for reducing their reduced values
     val allRDDs = new ArrayBuffer[RDD[(K, V)]]() += previousWindowRDD ++= oldRDDs ++= newRDDs
 
     // Cogroup the reduced RDDs and merge the reduced values
-    val cogroupedRDD = new CoGroupedRDD[K](allRDDs.toSeq.asInstanceOf[Seq[RDD[(_, _)]]], partitioner)
-    //val mergeValuesFunc = mergeValues(oldRDDs.size, newRDDs.size) _
+    val cogroupedRDD =
+      new CoGroupedRDD[K](allRDDs.toSeq.asInstanceOf[Seq[RDD[(_, _)]]], partitioner)
 
     val numOldValues = oldRDDs.size
     val numNewValues = newRDDs.size
@@ -114,7 +118,9 @@ class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
       // Getting reduced values "old time steps" that will be removed from current window
       val oldValues = (1 to numOldValues).map(i => seqOfValues(i)).filter(!_.isEmpty).map(_.head)
       // Getting reduced values "new time steps"
-      val newValues = (1 to numNewValues).map(i => seqOfValues(numOldValues + i)).filter(!_.isEmpty).map(_.head)
+      val newValues =
+        (1 to numNewValues).map(i => seqOfValues(numOldValues + i)).filter(!_.isEmpty).map(_.head)
+
       if (seqOfValues(0).isEmpty) {
         // If previous window's reduce value does not exist, then at least new values should exist
         if (newValues.isEmpty) {
@@ -140,10 +146,12 @@ class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
 
     val mergedValuesRDD = cogroupedRDD.asInstanceOf[RDD[(K,Seq[Seq[V]])]].mapValues(mergeValues)
 
-    Some(mergedValuesRDD)
+    if (filterFunc.isDefined) {
+      Some(mergedValuesRDD.filter(filterFunc.get))
+    } else {
+      Some(mergedValuesRDD)
+    }
   }
-
-
 }
 
 
diff --git a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala b/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
index 3ffe4b64d0..83d8591a3a 100644
--- a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
+++ b/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
@@ -291,7 +291,6 @@ class TestOutputStream[T: ClassManifest](
     (rdd: RDD[T], t: Time) => {
       val collected = rdd.collect()
       output += collected
-      println(t + ": " + collected.mkString("[", ",", "]"))
     }
   ) {
 
diff --git a/streaming/src/test/resources/log4j.properties b/streaming/src/test/resources/log4j.properties
index 5652596e1e..f0638e0e02 100644
--- a/streaming/src/test/resources/log4j.properties
+++ b/streaming/src/test/resources/log4j.properties
@@ -1,7 +1,7 @@
 # Set everything to be logged to the file streaming/target/unit-tests.log 
 log4j.rootCategory=WARN, file
 # log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file=org.apache.log4j.ConsoleAppender
+log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=streaming/target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
diff --git a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
index 1080790147..e6ac7b35aa 100644
--- a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
@@ -84,12 +84,9 @@ class WindowOperationsSuite extends TestSuiteBase {
   )
 
   /*
-  The output of the reduceByKeyAndWindow with inverse reduce function is
-  different from the naive reduceByKeyAndWindow. Even if the count of a
-  particular key is 0, the key does not get eliminated from the RDDs of
-  ReducedWindowedDStream. This causes the number of keys in these RDDs to
-  increase forever. A more generalized version that allows elimination of
-  keys should be considered.
+  The output of the reduceByKeyAndWindow with inverse function but without a filter
+  function will be different from the naive reduceByKeyAndWindow, as no keys get
+  eliminated from the ReducedWindowedDStream even if the value of a key becomes 0.
   */
 
   val bigReduceInvOutput = Seq(
@@ -177,31 +174,31 @@ class WindowOperationsSuite extends TestSuiteBase {
 
   // Testing reduceByKeyAndWindow (with invertible reduce function)
 
-  testReduceByKeyAndWindowInv(
+  testReduceByKeyAndWindowWithInverse(
     "basic reduction",
     Seq(Seq(("a", 1), ("a", 3)) ),
     Seq(Seq(("a", 4)) )
   )
 
-  testReduceByKeyAndWindowInv(
+  testReduceByKeyAndWindowWithInverse(
     "key already in window and new value added into window",
     Seq( Seq(("a", 1)), Seq(("a", 1)) ),
     Seq( Seq(("a", 1)), Seq(("a", 2)) )
   )
 
-  testReduceByKeyAndWindowInv(
+  testReduceByKeyAndWindowWithInverse(
     "new key added into window",
     Seq( Seq(("a", 1)), Seq(("a", 1), ("b", 1)) ),
     Seq( Seq(("a", 1)), Seq(("a", 2), ("b", 1)) )
   )
 
-  testReduceByKeyAndWindowInv(
+  testReduceByKeyAndWindowWithInverse(
     "key removed from window",
     Seq( Seq(("a", 1)), Seq(("a", 1)), Seq(), Seq() ),
     Seq( Seq(("a", 1)), Seq(("a", 2)), Seq(("a", 1)), Seq(("a", 0)) )
   )
 
-  testReduceByKeyAndWindowInv(
+  testReduceByKeyAndWindowWithInverse(
     "larger slide time",
     largerSlideInput,
     largerSlideReduceOutput,
@@ -209,7 +206,9 @@ class WindowOperationsSuite extends TestSuiteBase {
     Seconds(2)
   )
 
-  testReduceByKeyAndWindowInv("big test", bigInput, bigReduceInvOutput)
+  testReduceByKeyAndWindowWithInverse("big test", bigInput, bigReduceInvOutput)
+
+  testReduceByKeyAndWindowWithFilteredInverse("big test", bigInput, bigReduceOutput)
 
   test("groupByKeyAndWindow") {
     val input = bigInput
@@ -276,27 +275,45 @@ class WindowOperationsSuite extends TestSuiteBase {
     test("reduceByKeyAndWindow - " + name) {
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
       val operation = (s: DStream[(String, Int)]) => {
-        s.reduceByKeyAndWindow(_ + _, windowDuration, slideDuration).persist()
+        s.reduceByKeyAndWindow((x: Int, y: Int) => x + y, windowDuration, slideDuration)
       }
       testOperation(input, operation, expectedOutput, numBatches, true)
     }
   }
 
-  def testReduceByKeyAndWindowInv(
+  def testReduceByKeyAndWindowWithInverse(
     name: String,
     input: Seq[Seq[(String, Int)]],
     expectedOutput: Seq[Seq[(String, Int)]],
     windowDuration: Duration = Seconds(2),
     slideDuration: Duration = Seconds(1)
   ) {
-    test("reduceByKeyAndWindowInv - " + name) {
+    test("ReduceByKeyAndWindow with inverse function - " + name) {
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
       val operation = (s: DStream[(String, Int)]) => {
         s.reduceByKeyAndWindow(_ + _, _ - _, windowDuration, slideDuration)
-         .persist()
          .checkpoint(Seconds(100)) // Large value to avoid effect of RDD checkpointing
       }
       testOperation(input, operation, expectedOutput, numBatches, true)
     }
   }
+
+  def testReduceByKeyAndWindowWithFilteredInverse(
+      name: String,
+      input: Seq[Seq[(String, Int)]],
+      expectedOutput: Seq[Seq[(String, Int)]],
+      windowDuration: Duration = Seconds(2),
+      slideDuration: Duration = Seconds(1)
+    ) {
+    test("reduceByKeyAndWindow with inverse and filter functions - " + name) {
+      val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
+      val filterFunc = (p: (String, Int)) => p._2 != 0
+      val operation = (s: DStream[(String, Int)]) => {
+        s.reduceByKeyAndWindow(_ + _, _ - _, windowDuration, slideDuration, filterFunc = filterFunc)
+          .persist()
+          .checkpoint(Seconds(100)) // Large value to avoid effect of RDD checkpointing
+      }
+      testOperation(input, operation, expectedOutput, numBatches, true)
+    }
+  }
 }

From 03e8dc6861936a0862fba1ca9f830d5ff507718f Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 13 Feb 2013 20:59:29 -0800
Subject: [PATCH 285/291] Changes functions comments to make them more
 consistent.

---
 .../streaming/PairDStreamFunctions.scala      | 42 ++++++++--------
 .../streaming/api/java/JavaPairDStream.scala  | 48 +++++++++----------
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala b/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
index 021ff83b36..835b20ae08 100644
--- a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
@@ -26,7 +26,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
+   * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    */
   def groupByKey(): DStream[(K, Seq[V])] = {
@@ -34,7 +34,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
+   * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with `numPartitions` partitions.
    */
   def groupByKey(numPartitions: Int): DStream[(K, Seq[V])] = {
@@ -42,7 +42,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `groupByKey` on each RDD. The supplied [[spark.Partitioner]]
+   * Return a new DStream by applying `groupByKey` on each RDD. The supplied [[spark.Partitioner]]
    * is used to control the partitioning of each RDD.
    */
   def groupByKey(partitioner: Partitioner): DStream[(K, Seq[V])] = {
@@ -54,7 +54,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` to each RDD. The values for each key are
+   * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
    * merged using the associative reduce function. Hash partitioning is used to generate the RDDs
    * with Spark's default number of partitions.
    */
@@ -63,7 +63,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` to each RDD. The values for each key are
+   * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
    * merged using the supplied reduce function. Hash partitioning is used to generate the RDDs
    * with `numPartitions` partitions.
    */
@@ -72,7 +72,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` to each RDD. The values for each key are
+   * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
    * merged using the supplied reduce function. [[spark.Partitioner]] is used to control the
    * partitioning of each RDD.
    */
@@ -82,7 +82,7 @@ extends Serializable {
   }
 
   /**
-   * Combine elements of each key in DStream's RDDs using custom function. This is similar to the
+   * Combine elements of each key in DStream's RDDs using custom functions. This is similar to the
    * combineByKey for RDDs. Please refer to combineByKey in [[spark.PairRDDFunctions]] for more
    * information.
    */
@@ -95,7 +95,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by counting the number of values of each key in each RDD. Hash
+   * Return a new DStream by counting the number of values of each key in each RDD. Hash
    * partitioning is used to generate the RDDs with Spark's `numPartitions` partitions.
    */
   def countByKey(numPartitions: Int = self.ssc.sc.defaultParallelism): DStream[(K, Long)] = {
@@ -103,7 +103,7 @@ extends Serializable {
   }
 
   /**
-   * Creates a new DStream by applying `groupByKey` over a sliding window. This is similar to
+   * Return a new DStream by applying `groupByKey` over a sliding window. This is similar to
    * `DStream.groupByKey()` but applies it over a sliding window. The new DStream generates RDDs
    * with the same interval as this DStream. Hash partitioning is used to generate the RDDs with
    * Spark's default number of partitions.
@@ -115,7 +115,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `groupByKey` over a sliding window. Similar to
+   * Return a new DStream by applying `groupByKey` over a sliding window. Similar to
    * `DStream.groupByKey()`, but applies it over a sliding window. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    * @param windowDuration width of the window; must be a multiple of this DStream's
@@ -129,7 +129,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `groupByKey` over a sliding window on `this` DStream.
+   * Return a new DStream by applying `groupByKey` over a sliding window on `this` DStream.
    * Similar to `DStream.groupByKey()`, but applies it over a sliding window.
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    * @param windowDuration width of the window; must be a multiple of this DStream's
@@ -167,7 +167,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` over a sliding window on `this` DStream.
+   * Return a new DStream by applying `reduceByKey` over a sliding window on `this` DStream.
    * Similar to `DStream.reduceByKey()`, but applies it over a sliding window. The new DStream
    * generates RDDs with the same interval as this DStream. Hash partitioning is used to generate
    * the RDDs with Spark's default number of partitions.
@@ -183,7 +183,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` over a sliding window. This is similar to
+   * Return a new DStream by applying `reduceByKey` over a sliding window. This is similar to
    * `DStream.reduceByKey()` but applies it over a sliding window. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    * @param reduceFunc associative reduce function
@@ -202,7 +202,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` over a sliding window. This is similar to
+   * Return a new DStream by applying `reduceByKey` over a sliding window. This is similar to
    * `DStream.reduceByKey()` but applies it over a sliding window. Hash partitioning is used to
    * generate the RDDs with `numPartitions` partitions.
    * @param reduceFunc associative reduce function
@@ -223,7 +223,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` over a sliding window. Similar to
+   * Return a new DStream by applying `reduceByKey` over a sliding window. Similar to
    * `DStream.reduceByKey()`, but applies it over a sliding window.
    * @param reduceFunc associative reduce function
    * @param windowDuration width of the window; must be a multiple of this DStream's
@@ -247,7 +247,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying incremental `reduceByKey` over a sliding window.
+   * Return a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
@@ -280,7 +280,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by applying incremental `reduceByKey` over a sliding window.
+   * Return a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
@@ -316,7 +316,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new DStream by counting the number of values for each key over a window.
+   * Return a new DStream by counting the number of values for each key over a window.
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
@@ -341,7 +341,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new "state" DStream where the state for each key is updated by applying
+   * Return a new "state" DStream where the state for each key is updated by applying
    * the given function on the previous state of the key and the new values of each key.
    * Hash partitioning is used to generate the RDDs with Spark's default number of partitions.
    * @param updateFunc State update function. If `this` function returns None, then
@@ -355,7 +355,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new "state" DStream where the state for each key is updated by applying
+   * Return a new "state" DStream where the state for each key is updated by applying
    * the given function on the previous state of the key and the new values of each key.
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    * @param updateFunc State update function. If `this` function returns None, then
@@ -390,7 +390,7 @@ extends Serializable {
   }
 
   /**
-   * Create a new "state" DStream where the state for each key is updated by applying
+   * Return a new "state" DStream where the state for each key is updated by applying
    * the given function on the previous state of the key and the new values of each key.
    * [[spark.Paxrtitioner]] is used to control the partitioning of each RDD.
    * @param updateFunc State update function. If `this` function returns None, then
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
index 4d3e0d0304..048e10b69c 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
@@ -25,17 +25,17 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   // Methods common to all DStream's
   // =======================================================================
 
-  /** Returns a new DStream containing only the elements that satisfy a predicate. */
+  /** Return a new DStream containing only the elements that satisfy a predicate. */
   def filter(f: JFunction[(K, V), java.lang.Boolean]): JavaPairDStream[K, V] =
     dstream.filter((x => f(x).booleanValue()))
 
-  /** Persists RDDs of this DStream with the default storage level (MEMORY_ONLY_SER) */
+  /** Persist RDDs of this DStream with the default storage level (MEMORY_ONLY_SER) */
   def cache(): JavaPairDStream[K, V] = dstream.cache()
 
-  /** Persists RDDs of this DStream with the default storage level (MEMORY_ONLY_SER) */
+  /** Persist RDDs of this DStream with the default storage level (MEMORY_ONLY_SER) */
   def persist(): JavaPairDStream[K, V] = dstream.cache()
 
-  /** Persists the RDDs of this DStream with the given storage level */
+  /** Persist the RDDs of this DStream with the given storage level */
   def persist(storageLevel: StorageLevel): JavaPairDStream[K, V] = dstream.persist(storageLevel)
 
   /** Method that generates a RDD for the given Duration */
@@ -67,7 +67,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     dstream.window(windowDuration, slideDuration)
 
   /**
-   * Returns a new DStream which computed based on tumbling window on this DStream.
+   * Return a new DStream which computed based on tumbling window on this DStream.
    * This is equivalent to window(batchDuration, batchDuration).
    * @param batchDuration tumbling window duration; must be a multiple of this DStream's interval
    */
@@ -75,7 +75,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     dstream.tumble(batchDuration)
 
   /**
-   * Returns a new DStream by unifying data of another DStream with this DStream.
+   * Return a new DStream by unifying data of another DStream with this DStream.
    * @param that Another DStream having the same interval (i.e., slideDuration) as this DStream.
    */
   def union(that: JavaPairDStream[K, V]): JavaPairDStream[K, V] =
@@ -86,21 +86,21 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   // =======================================================================
 
   /**
-   * Create a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
+   * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    */
   def groupByKey(): JavaPairDStream[K, JList[V]] =
     dstream.groupByKey().mapValues(seqAsJavaList _)
 
   /**
-   * Create a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
+   * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with `numPartitions` partitions.
    */
   def groupByKey(numPartitions: Int): JavaPairDStream[K, JList[V]] =
     dstream.groupByKey(numPartitions).mapValues(seqAsJavaList _)
 
   /**
-   * Creates a new DStream by applying `groupByKey` on each RDD of `this` DStream.
+   * Return a new DStream by applying `groupByKey` on each RDD of `this` DStream.
    * Therefore, the values for each key in `this` DStream's RDDs are grouped into a
    * single sequence to generate the RDDs of the new DStream. [[spark.Partitioner]]
    * is used to control the partitioning of each RDD.
@@ -109,7 +109,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     dstream.groupByKey(partitioner).mapValues(seqAsJavaList _)
 
   /**
-   * Create a new DStream by applying `reduceByKey` to each RDD. The values for each key are
+   * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
    * merged using the associative reduce function. Hash partitioning is used to generate the RDDs
    * with Spark's default number of partitions.
    */
@@ -117,7 +117,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     dstream.reduceByKey(func)
 
   /**
-   * Create a new DStream by applying `reduceByKey` to each RDD. The values for each key are
+   * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
    * merged using the supplied reduce function. Hash partitioning is used to generate the RDDs
    * with `numPartitions` partitions.
    */
@@ -125,7 +125,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     dstream.reduceByKey(func, numPartitions)
 
   /**
-   * Create a new DStream by applying `reduceByKey` to each RDD. The values for each key are
+   * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
    * merged using the supplied reduce function. [[spark.Partitioner]] is used to control the
    * partitioning of each RDD.
    */
@@ -149,7 +149,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by counting the number of values of each key in each RDD. Hash
+   * Return a new DStream by counting the number of values of each key in each RDD. Hash
    * partitioning is used to generate the RDDs with Spark's `numPartitions` partitions.
    */
   def countByKey(numPartitions: Int): JavaPairDStream[K, JLong] = {
@@ -158,7 +158,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
 
 
   /**
-   * Create a new DStream by counting the number of values of each key in each RDD. Hash
+   * Return a new DStream by counting the number of values of each key in each RDD. Hash
    * partitioning is used to generate the RDDs with the default number of partitions.
    */
   def countByKey(): JavaPairDStream[K, JLong] = {
@@ -166,7 +166,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Creates a new DStream by applying `groupByKey` over a sliding window. This is similar to
+   * Return a new DStream by applying `groupByKey` over a sliding window. This is similar to
    * `DStream.groupByKey()` but applies it over a sliding window. The new DStream generates RDDs
    * with the same interval as this DStream. Hash partitioning is used to generate the RDDs with
    * Spark's default number of partitions.
@@ -178,7 +178,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by applying `groupByKey` over a sliding window. Similar to
+   * Return a new DStream by applying `groupByKey` over a sliding window. Similar to
    * `DStream.groupByKey()`, but applies it over a sliding window. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    * @param windowDuration width of the window; must be a multiple of this DStream's
@@ -193,7 +193,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by applying `groupByKey` over a sliding window on `this` DStream.
+   * Return a new DStream by applying `groupByKey` over a sliding window on `this` DStream.
    * Similar to `DStream.groupByKey()`, but applies it over a sliding window.
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    * @param windowDuration width of the window; must be a multiple of this DStream's
@@ -210,7 +210,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by applying `groupByKey` over a sliding window on `this` DStream.
+   * Return a new DStream by applying `groupByKey` over a sliding window on `this` DStream.
    * Similar to `DStream.groupByKey()`, but applies it over a sliding window.
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
@@ -243,7 +243,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` over a sliding window. This is similar to
+   * Return a new DStream by applying `reduceByKey` over a sliding window. This is similar to
    * `DStream.reduceByKey()` but applies it over a sliding window. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    * @param reduceFunc associative reduce function
@@ -262,7 +262,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` over a sliding window. This is similar to
+   * Return a new DStream by applying `reduceByKey` over a sliding window. This is similar to
    * `DStream.reduceByKey()` but applies it over a sliding window. Hash partitioning is used to
    * generate the RDDs with `numPartitions` partitions.
    * @param reduceFunc associative reduce function
@@ -283,7 +283,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by applying `reduceByKey` over a sliding window. Similar to
+   * Return a new DStream by applying `reduceByKey` over a sliding window. Similar to
    * `DStream.reduceByKey()`, but applies it over a sliding window.
    * @param reduceFunc associative reduce function
    * @param windowDuration width of the window; must be a multiple of this DStream's
@@ -303,7 +303,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by reducing over a using incremental computation.
+   * Return a new DStream by reducing over a using incremental computation.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
@@ -328,7 +328,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by applying incremental `reduceByKey` over a sliding window.
+   * Return a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
@@ -366,7 +366,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Create a new DStream by applying incremental `reduceByKey` over a sliding window.
+   * Return a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduce value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)

From 2eacf22401f75b956036fb0c32eb38baa16b224e Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 14 Feb 2013 12:21:47 -0800
Subject: [PATCH 286/291] Removed countByKeyAndWindow on paired DStreams, and
 added countByValueAndWindow for all DStreams. Updated both scala and java API
 and testsuites.

---
 .../examples/clickstream/PageViewStream.scala | 11 ++-
 .../main/scala/spark/streaming/DStream.scala  | 88 +++++++++++++++----
 .../streaming/PairDStreamFunctions.scala      | 43 ++-------
 .../streaming/api/java/JavaDStream.scala      | 27 +++---
 .../streaming/api/java/JavaDStreamLike.scala  | 87 +++++++++++++++++-
 .../streaming/api/java/JavaPairDStream.scala  | 56 +-----------
 .../java/spark/streaming/JavaAPISuite.java    | 79 +++++++----------
 .../streaming/BasicOperationsSuite.scala      | 21 ++++-
 .../streaming/WindowOperationsSuite.scala     |  8 +-
 9 files changed, 231 insertions(+), 189 deletions(-)

diff --git a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala b/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala
index a191321d91..60f228b8ad 100644
--- a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala
@@ -28,16 +28,15 @@ object PageViewStream {
 
     // Create a NetworkInputDStream on target host:port and convert each line to a PageView
     val pageViews = ssc.networkTextStream(host, port)
-                        .flatMap(_.split("\n"))
-                        .map(PageView.fromString(_))
+                       .flatMap(_.split("\n"))
+                       .map(PageView.fromString(_))
 
     // Return a count of views per URL seen in each batch
-    val pageCounts = pageViews.map(view => ((view.url, 1))).countByKey()
+    val pageCounts = pageViews.map(view => view.url).countByValue()
 
     // Return a sliding window of page views per URL in the last ten seconds
-    val slidingPageCounts = pageViews.map(view => ((view.url, 1)))
-                                .window(Seconds(10), Seconds(2))
-                                .countByKey()
+    val slidingPageCounts = pageViews.map(view => view.url)
+                                     .countByValueAndWindow(Seconds(10), Seconds(2))
 
 
     // Return the rate of error pages (a non 200 status) in each zip code over the last 30 seconds
diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index 6abec9e6be..ce42b742d7 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -441,6 +441,15 @@ abstract class DStream[T: ClassManifest] (
    */
   def count(): DStream[Long] = this.map(_ => 1L).reduce(_ + _)
 
+  /**
+   * Return a new DStream in which each RDD contains the counts of each distinct value in
+   * each RDD of this DStream. Hash partitioning is used to generate
+   * the RDDs with `numPartitions` partitions (Spark's default number of partitions if
+   * `numPartitions` not specified).
+   */
+  def countByValue(numPartitions: Int = ssc.sc.defaultParallelism): DStream[(T, Long)] =
+    this.map(x => (x, 1L)).reduceByKey((x: Long, y: Long) => x + y, numPartitions)
+
   /**
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * this DStream will be registered as an output stream and therefore materialized.
@@ -494,14 +503,16 @@ abstract class DStream[T: ClassManifest] (
   }
 
   /**
-   * Return a new DStream which is computed based on windowed batches of this DStream.
-   * The new DStream generates RDDs with the same interval as this DStream.
+   * Return a new DStream in which each RDD contains all the elements in seen in a
+   * sliding window of time over this DStream. The new DStream generates RDDs with
+   * the same interval as this DStream.
    * @param windowDuration width of the window; must be a multiple of this DStream's interval.
    */
   def window(windowDuration: Duration): DStream[T] = window(windowDuration, this.slideDuration)
 
   /**
-   * Return a new DStream which is computed based on windowed batches of this DStream.
+   * Return a new DStream in which each RDD contains all the elements in seen in a
+   * sliding window of time over this DStream.
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
@@ -512,19 +523,15 @@ abstract class DStream[T: ClassManifest] (
     new WindowedDStream(this, windowDuration, slideDuration)
   }
 
-  /**
-   * Return a new DStream which computed based on tumbling window on this DStream.
-   * This is equivalent to window(batchTime, batchTime).
-   * @param batchDuration tumbling window duration; must be a multiple of this DStream's
-   *                  batching interval
-   */
-  def tumble(batchDuration: Duration): DStream[T] = window(batchDuration, batchDuration)
-
   /**
    * Return a new DStream in which each RDD has a single element generated by reducing all
-   * elements in a window over this DStream. windowDuration and slideDuration are as defined
-   * in the window() operation. This is equivalent to
-   * window(windowDuration, slideDuration).reduce(reduceFunc)
+   * elements in a sliding window over this DStream.
+   * @param reduceFunc associative reduce function
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
    */
   def reduceByWindow(
       reduceFunc: (T, T) => T,
@@ -534,6 +541,22 @@ abstract class DStream[T: ClassManifest] (
     this.reduce(reduceFunc).window(windowDuration, slideDuration).reduce(reduceFunc)
   }
 
+  /**
+   * Return a new DStream in which each RDD has a single element generated by reducing all
+   * elements in a sliding window over this DStream. However, the reduction is done incrementally
+   * using the old window's reduced value :
+   *  1. reduce the new values that entered the window (e.g., adding new counts)
+   *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
+   *  This is more efficient than reduceByWindow without "inverse reduce" function.
+   *  However, it is applicable to only "invertible reduce functions".
+   * @param reduceFunc associative reduce function
+   * @param invReduceFunc inverse reduce function
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
+   */
   def reduceByWindow(
       reduceFunc: (T, T) => T,
       invReduceFunc: (T, T) => T,
@@ -547,13 +570,46 @@ abstract class DStream[T: ClassManifest] (
 
   /**
    * Return a new DStream in which each RDD has a single element generated by counting the number
-   * of elements in a window over this DStream. windowDuration and slideDuration are as defined in the
-   * window() operation. This is equivalent to window(windowDuration, slideDuration).count()
+   * of elements in a sliding window over this DStream. Hash partitioning is used to generate the RDDs with
+   * Spark's default number of partitions.
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
    */
   def countByWindow(windowDuration: Duration, slideDuration: Duration): DStream[Long] = {
     this.map(_ => 1L).reduceByWindow(_ + _, _ - _, windowDuration, slideDuration)
   }
 
+  /**
+   * Return a new DStream in which each RDD contains the count of distinct elements in
+   * RDDs in a sliding window over this DStream. Hash partitioning is used to generate
+   * the RDDs with `numPartitions` partitions (Spark's default number of partitions if
+   * `numPartitions` not specified).
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
+   * @param numPartitions  number of partitions of each RDD in the new DStream.
+   */
+  def countByValueAndWindow(
+      windowDuration: Duration,
+      slideDuration: Duration,
+      numPartitions: Int = ssc.sc.defaultParallelism
+    ): DStream[(T, Long)] = {
+
+    this.map(x => (x, 1L)).reduceByKeyAndWindow(
+      (x: Long, y: Long) => x + y,
+      (x: Long, y: Long) => x - y,
+      windowDuration,
+      slideDuration,
+      numPartitions,
+      (x: (T, Long)) => x._2 != 0L
+    )
+  }
+
   /**
    * Return a new DStream by unifying data of another DStream with this DStream.
    * @param that Another DStream having the same slideDuration as this DStream.
diff --git a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala b/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
index 835b20ae08..5127db3bbc 100644
--- a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
@@ -94,14 +94,6 @@ extends Serializable {
     new ShuffledDStream[K, V, C](self, createCombiner, mergeValue, mergeCombiner, partitioner)
   }
 
-  /**
-   * Return a new DStream by counting the number of values of each key in each RDD. Hash
-   * partitioning is used to generate the RDDs with Spark's `numPartitions` partitions.
-   */
-  def countByKey(numPartitions: Int = self.ssc.sc.defaultParallelism): DStream[(K, Long)] = {
-    self.map(x => (x._1, 1L)).reduceByKey((x: Long, y: Long) => x + y, numPartitions)
-  }
-
   /**
    * Return a new DStream by applying `groupByKey` over a sliding window. This is similar to
    * `DStream.groupByKey()` but applies it over a sliding window. The new DStream generates RDDs
@@ -211,7 +203,7 @@ extends Serializable {
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
-   * @param numPartitions  Number of partitions of each RDD in the new DStream.
+   * @param numPartitions  number of partitions of each RDD in the new DStream.
    */
   def reduceByKeyAndWindow(
       reduceFunc: (V, V) => V,
@@ -248,10 +240,10 @@ extends Serializable {
 
   /**
    * Return a new DStream by applying incremental `reduceByKey` over a sliding window.
-   * The reduced value of over a new window is calculated using the old window's reduce value :
+   * The reduced value of over a new window is calculated using the old window's reduced value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
-   * This is more efficient that reduceByKeyAndWindow without "inverse reduce" function.
+   * This is more efficient than reduceByKeyAndWindow without "inverse reduce" function.
    * However, it is applicable to only "invertible reduce functions".
    * Hash partitioning is used to generate the RDDs with Spark's default number of partitions.
    * @param reduceFunc associative reduce function
@@ -281,10 +273,10 @@ extends Serializable {
 
   /**
    * Return a new DStream by applying incremental `reduceByKey` over a sliding window.
-   * The reduced value of over a new window is calculated using the old window's reduce value :
+   * The reduced value of over a new window is calculated using the old window's reduced value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
-   * This is more efficient that reduceByKeyAndWindow without "inverse reduce" function.
+   * This is more efficient than reduceByKeyAndWindow without "inverse reduce" function.
    * However, it is applicable to only "invertible reduce functions".
    * @param reduceFunc     associative reduce function
    * @param invReduceFunc  inverse reduce function
@@ -315,31 +307,6 @@ extends Serializable {
     )
   }
 
-  /**
-   * Return a new DStream by counting the number of values for each key over a window.
-   * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
-   * @param windowDuration width of the window; must be a multiple of this DStream's
-   *                       batching interval
-   * @param slideDuration  sliding interval of the window (i.e., the interval after which
-   *                       the new DStream will generate RDDs); must be a multiple of this
-   *                       DStream's batching interval
-   * @param numPartitions  Number of partitions of each RDD in the new DStream.
-   */
-  def countByKeyAndWindow(
-      windowDuration: Duration,
-      slideDuration: Duration,
-      numPartitions: Int = self.ssc.sc.defaultParallelism
-    ): DStream[(K, Long)] = {
-
-    self.map(x => (x._1, 1L)).reduceByKeyAndWindow(
-      (x: Long, y: Long) => x + y,
-      (x: Long, y: Long) => x - y,
-      windowDuration,
-      slideDuration,
-      numPartitions
-    )
-  }
-
   /**
    * Return a new "state" DStream where the state for each key is updated by applying
    * the given function on the previous state of the key and the new values of each key.
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala
index 2e7466b16c..30985b4ebc 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala
@@ -36,7 +36,7 @@ class JavaDStream[T](val dstream: DStream[T])(implicit val classManifest: ClassM
   def cache(): JavaDStream[T] = dstream.cache()
 
   /** Persist RDDs of this DStream with the default storage level (MEMORY_ONLY_SER) */
-  def persist(): JavaDStream[T] = dstream.cache()
+  def persist(): JavaDStream[T] = dstream.persist()
 
   /** Persist the RDDs of this DStream with the given storage level */
   def persist(storageLevel: StorageLevel): JavaDStream[T] = dstream.persist(storageLevel)
@@ -50,33 +50,26 @@ class JavaDStream[T](val dstream: DStream[T])(implicit val classManifest: ClassM
   }
 
   /**
-   * Return a new DStream which is computed based on windowed batches of this DStream.
-   * The new DStream generates RDDs with the same interval as this DStream.
+   * Return a new DStream in which each RDD contains all the elements in seen in a
+   * sliding window of time over this DStream. The new DStream generates RDDs with
+   * the same interval as this DStream.
    * @param windowDuration width of the window; must be a multiple of this DStream's interval.
-   * @return
    */
   def window(windowDuration: Duration): JavaDStream[T] =
     dstream.window(windowDuration)
 
   /**
-   * Return a new DStream which is computed based on windowed batches of this DStream.
-   * @param windowDuration duration (i.e., width) of the window;
-   *                   must be a multiple of this DStream's interval
+   * Return a new DStream in which each RDD contains all the elements in seen in a
+   * sliding window of time over this DStream.
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
-   *                   the new DStream will generate RDDs); must be a multiple of this
-   *                   DStream's interval
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
    */
   def window(windowDuration: Duration, slideDuration: Duration): JavaDStream[T] =
     dstream.window(windowDuration, slideDuration)
 
-  /**
-   * Return a new DStream which computed based on tumbling window on this DStream.
-   * This is equivalent to window(batchDuration, batchDuration).
-   * @param batchDuration tumbling window duration; must be a multiple of this DStream's interval
-   */
-  def tumble(batchDuration: Duration): JavaDStream[T] =
-    dstream.tumble(batchDuration)
-
   /**
    * Return a new DStream by unifying data of another DStream with this DStream.
    * @param that Another DStream having the same interval (i.e., slideDuration) as this DStream.
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala
index b93cb7865a..1c1ba05ff9 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala
@@ -33,6 +33,26 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This]] extends Serializable
    */
   def count(): JavaDStream[JLong] = dstream.count()
 
+  /**
+   * Return a new DStream in which each RDD contains the counts of each distinct value in
+   * each RDD of this DStream.  Hash partitioning is used to generate the RDDs with
+   * Spark's default number of partitions.
+   */
+  def countByValue(): JavaPairDStream[T, JLong] = {
+    JavaPairDStream.scalaToJavaLong(dstream.countByValue())
+  }
+
+  /**
+   * Return a new DStream in which each RDD contains the counts of each distinct value in
+   * each RDD of this DStream. Hash partitioning is used to generate the RDDs with `numPartitions`
+   * partitions.
+   * @param numPartitions  number of partitions of each RDD in the new DStream.
+   */
+  def countByValue(numPartitions: Int): JavaPairDStream[T, JLong] = {
+    JavaPairDStream.scalaToJavaLong(dstream.countByValue(numPartitions))
+  }
+
+
   /**
    * Return a new DStream in which each RDD has a single element generated by counting the number
    * of elements in a window over this DStream. windowDuration and slideDuration are as defined in the
@@ -42,6 +62,39 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This]] extends Serializable
     dstream.countByWindow(windowDuration, slideDuration)
   }
 
+  /**
+   * Return a new DStream in which each RDD contains the count of distinct elements in
+   * RDDs in a sliding window over this DStream. Hash partitioning is used to generate the RDDs with
+   * Spark's default number of partitions.
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
+   */
+  def countByValueAndWindow(windowDuration: Duration, slideDuration: Duration)
+    : JavaPairDStream[T, JLong] = {
+    JavaPairDStream.scalaToJavaLong(
+      dstream.countByValueAndWindow(windowDuration, slideDuration))
+  }
+
+  /**
+   * Return a new DStream in which each RDD contains the count of distinct elements in
+   * RDDs in a sliding window over this DStream. Hash partitioning is used to generate the RDDs with `numPartitions`
+   * partitions.
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
+   * @param numPartitions  number of partitions of each RDD in the new DStream.
+   */
+  def countByValueAndWindow(windowDuration: Duration, slideDuration: Duration, numPartitions: Int)
+    : JavaPairDStream[T, JLong] = {
+    JavaPairDStream.scalaToJavaLong(
+      dstream.countByValueAndWindow(windowDuration, slideDuration, numPartitions))
+  }
+
   /**
    * Return a new DStream in which each RDD is generated by applying glom() to each RDD of
    * this DStream. Applying glom() to an RDD coalesces all elements within each partition into
@@ -114,8 +167,38 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This]] extends Serializable
 
   /**
    * Return a new DStream in which each RDD has a single element generated by reducing all
-   * elements in a window over this DStream. windowDuration and slideDuration are as defined in the
-   * window() operation. This is equivalent to window(windowDuration, slideDuration).reduce(reduceFunc)
+   * elements in a sliding window over this DStream.
+   * @param reduceFunc associative reduce function
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
+   */
+  def reduceByWindow(
+      reduceFunc: (T, T) => T,
+      windowDuration: Duration,
+      slideDuration: Duration
+    ): DStream[T] = {
+    dstream.reduceByWindow(reduceFunc, windowDuration, slideDuration)
+  }
+
+
+  /**
+   * Return a new DStream in which each RDD has a single element generated by reducing all
+   * elements in a sliding window over this DStream. However, the reduction is done incrementally
+   * using the old window's reduced value :
+   *  1. reduce the new values that entered the window (e.g., adding new counts)
+   *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
+   *  This is more efficient than reduceByWindow without "inverse reduce" function.
+   *  However, it is applicable to only "invertible reduce functions".
+   * @param reduceFunc associative reduce function
+   * @param invReduceFunc inverse reduce function
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
    */
   def reduceByWindow(
       reduceFunc: JFunction2[T, T, T],
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
index 048e10b69c..952ca657bf 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
@@ -33,7 +33,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   def cache(): JavaPairDStream[K, V] = dstream.cache()
 
   /** Persist RDDs of this DStream with the default storage level (MEMORY_ONLY_SER) */
-  def persist(): JavaPairDStream[K, V] = dstream.cache()
+  def persist(): JavaPairDStream[K, V] = dstream.persist()
 
   /** Persist the RDDs of this DStream with the given storage level */
   def persist(storageLevel: StorageLevel): JavaPairDStream[K, V] = dstream.persist(storageLevel)
@@ -66,14 +66,6 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   def window(windowDuration: Duration, slideDuration: Duration): JavaPairDStream[K, V] =
     dstream.window(windowDuration, slideDuration)
 
-  /**
-   * Return a new DStream which computed based on tumbling window on this DStream.
-   * This is equivalent to window(batchDuration, batchDuration).
-   * @param batchDuration tumbling window duration; must be a multiple of this DStream's interval
-   */
-  def tumble(batchDuration: Duration): JavaPairDStream[K, V] =
-    dstream.tumble(batchDuration)
-
   /**
    * Return a new DStream by unifying data of another DStream with this DStream.
    * @param that Another DStream having the same interval (i.e., slideDuration) as this DStream.
@@ -148,23 +140,6 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     dstream.combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner)
   }
 
-  /**
-   * Return a new DStream by counting the number of values of each key in each RDD. Hash
-   * partitioning is used to generate the RDDs with Spark's `numPartitions` partitions.
-   */
-  def countByKey(numPartitions: Int): JavaPairDStream[K, JLong] = {
-    JavaPairDStream.scalaToJavaLong(dstream.countByKey(numPartitions));
-  }
-
-
-  /**
-   * Return a new DStream by counting the number of values of each key in each RDD. Hash
-   * partitioning is used to generate the RDDs with the default number of partitions.
-   */
-  def countByKey(): JavaPairDStream[K, JLong] = {
-    JavaPairDStream.scalaToJavaLong(dstream.countByKey());
-  }
-
   /**
    * Return a new DStream by applying `groupByKey` over a sliding window. This is similar to
    * `DStream.groupByKey()` but applies it over a sliding window. The new DStream generates RDDs
@@ -402,35 +377,6 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     )
   }
 
-  /**
-   * Create a new DStream by counting the number of values for each key over a window.
-   * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
-   * @param windowDuration width of the window; must be a multiple of this DStream's
-   *                       batching interval
-   * @param slideDuration  sliding interval of the window (i.e., the interval after which
-   *                       the new DStream will generate RDDs); must be a multiple of this
-   *                       DStream's batching interval
-   */
-  def countByKeyAndWindow(windowDuration: Duration, slideDuration: Duration)
-  : JavaPairDStream[K, JLong] = {
-    JavaPairDStream.scalaToJavaLong(dstream.countByKeyAndWindow(windowDuration, slideDuration))
-  }
-
-  /**
-   * Create a new DStream by counting the number of values for each key over a window.
-   * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
-   * @param windowDuration width of the window; must be a multiple of this DStream's
-   *                       batching interval
-   * @param slideDuration  sliding interval of the window (i.e., the interval after which
-   *                       the new DStream will generate RDDs); must be a multiple of this
-   *                       DStream's batching interval
-   * @param numPartitions  Number of partitions of each RDD in the new DStream.
-   */
-  def countByKeyAndWindow(windowDuration: Duration, slideDuration: Duration, numPartitions: Int)
-   : JavaPairDStream[K, Long] = {
-    dstream.countByKeyAndWindow(windowDuration, slideDuration, numPartitions)
-  }
-
   private def convertUpdateStateFunction[S](in: JFunction2[JList[V], Optional[S], Optional[S]]):
   (Seq[V], Option[S]) => Option[S] = {
     val scalaFunc: (Seq[V], Option[S]) => Option[S] = (values, state) => {
diff --git a/streaming/src/test/java/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
index 783a393a8f..7bea0b1fc4 100644
--- a/streaming/src/test/java/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
@@ -134,29 +134,6 @@ public class JavaAPISuite implements Serializable {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @Test
-  public void testTumble() {
-    List<List<Integer>> inputData = Arrays.asList(
-        Arrays.asList(1,2,3),
-        Arrays.asList(4,5,6),
-        Arrays.asList(7,8,9),
-        Arrays.asList(10,11,12),
-        Arrays.asList(13,14,15),
-        Arrays.asList(16,17,18));
-
-    List<List<Integer>> expected = Arrays.asList(
-        Arrays.asList(1,2,3,4,5,6),
-        Arrays.asList(7,8,9,10,11,12),
-        Arrays.asList(13,14,15,16,17,18));
-
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream windowed = stream.tumble(new Duration(2000));
-    JavaTestUtils.attachTestOutputStream(windowed);
-    List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 6, 3);
-
-    assertOrderInvariantEquals(expected, result);
-  }
-
   @Test
   public void testFilter() {
     List<List<String>> inputData = Arrays.asList(
@@ -584,24 +561,26 @@ public class JavaAPISuite implements Serializable {
   }
 
   @Test
-  public void testCountByKey() {
-    List<List<Tuple2<String, String>>> inputData = stringStringKVStream;
+  public void testCountByValue() {
+    List<List<String>> inputData = Arrays.asList(
+      Arrays.asList("hello", "world"),
+      Arrays.asList("hello", "moon"),
+      Arrays.asList("hello"));
 
     List<List<Tuple2<String, Long>>> expected = Arrays.asList(
-        Arrays.asList(
-            new Tuple2<String, Long>("california", 2L),
-            new Tuple2<String, Long>("new york", 2L)),
-        Arrays.asList(
-            new Tuple2<String, Long>("california", 2L),
-            new Tuple2<String, Long>("new york", 2L)));
+      Arrays.asList(
+              new Tuple2<String, Long>("hello", 1L),
+              new Tuple2<String, Long>("world", 1L)),
+      Arrays.asList(
+              new Tuple2<String, Long>("hello", 1L),
+              new Tuple2<String, Long>("moon", 1L)),
+      Arrays.asList(
+              new Tuple2<String, Long>("hello", 1L)));
 
-    JavaDStream<Tuple2<String, String>> stream = JavaTestUtils.attachTestInputStream(
-        ssc, inputData, 1);
-    JavaPairDStream<String, String> pairStream = JavaPairDStream.fromJavaDStream(stream);
-
-    JavaPairDStream<String, Long> counted = pairStream.countByKey();
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaPairDStream<String, Long> counted = stream.countByValue();
     JavaTestUtils.attachTestOutputStream(counted);
-    List<List<Tuple2<String, Long>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
+    List<List<Tuple2<String, Long>>> result = JavaTestUtils.runStreams(ssc, 3, 3);
 
     Assert.assertEquals(expected, result);
   }
@@ -712,26 +691,28 @@ public class JavaAPISuite implements Serializable {
   }
 
   @Test
-  public void testCountByKeyAndWindow() {
-    List<List<Tuple2<String, String>>> inputData = stringStringKVStream;
+  public void testCountByValueAndWindow() {
+    List<List<String>> inputData = Arrays.asList(
+        Arrays.asList("hello", "world"),
+        Arrays.asList("hello", "moon"),
+        Arrays.asList("hello"));
 
     List<List<Tuple2<String, Long>>> expected = Arrays.asList(
         Arrays.asList(
-            new Tuple2<String, Long>("california", 2L),
-            new Tuple2<String, Long>("new york", 2L)),
+            new Tuple2<String, Long>("hello", 1L),
+            new Tuple2<String, Long>("world", 1L)),
         Arrays.asList(
-            new Tuple2<String, Long>("california", 4L),
-            new Tuple2<String, Long>("new york", 4L)),
+            new Tuple2<String, Long>("hello", 2L),
+            new Tuple2<String, Long>("world", 1L),
+            new Tuple2<String, Long>("moon", 1L)),
         Arrays.asList(
-            new Tuple2<String, Long>("california", 2L),
-            new Tuple2<String, Long>("new york", 2L)));
+            new Tuple2<String, Long>("hello", 2L),
+            new Tuple2<String, Long>("moon", 1L)));
 
-    JavaDStream<Tuple2<String, String>> stream = JavaTestUtils.attachTestInputStream(
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(
         ssc, inputData, 1);
-    JavaPairDStream<String, String> pairStream = JavaPairDStream.fromJavaDStream(stream);
-
     JavaPairDStream<String, Long> counted =
-        pairStream.countByKeyAndWindow(new Duration(2000), new Duration(1000));
+      stream.countByValueAndWindow(new Duration(2000), new Duration(1000));
     JavaTestUtils.attachTestOutputStream(counted);
     List<List<Tuple2<String, Long>>> result = JavaTestUtils.runStreams(ssc, 3, 3);
 
diff --git a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
index 12388b8887..1e86cf49bb 100644
--- a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
@@ -24,7 +24,7 @@ class BasicOperationsSuite extends TestSuiteBase {
     )
   }
 
-  test("flatmap") {
+  test("flatMap") {
     val input = Seq(1 to 4, 5 to 8, 9 to 12)
     testOperation(
       input,
@@ -88,6 +88,23 @@ class BasicOperationsSuite extends TestSuiteBase {
     )
   }
 
+  test("count") {
+    testOperation(
+      Seq(1 to 1, 1 to 2, 1 to 3, 1 to 4),
+      (s: DStream[Int]) => s.count(),
+      Seq(Seq(1L), Seq(2L), Seq(3L), Seq(4L))
+    )
+  }
+
+  test("countByValue") {
+    testOperation(
+      Seq(1 to 1, Seq(1, 1, 1), 1 to 2, Seq(1, 1, 2, 2)),
+      (s: DStream[Int]) => s.countByValue(),
+      Seq(Seq((1, 1L)), Seq((1, 3L)), Seq((1, 1L), (2, 1L)), Seq((2, 2L), (1, 2L))),
+      true
+    )
+  }
+
   test("mapValues") {
     testOperation(
       Seq( Seq("a", "a", "b"), Seq("", ""), Seq() ),
@@ -206,7 +223,7 @@ class BasicOperationsSuite extends TestSuiteBase {
           case _ => Option(stateObj)
         }
       }
-      s.map(_ -> 1).updateStateByKey[StateObject](updateFunc).mapValues(_.counter)
+      s.map(x => (x, 1)).updateStateByKey[StateObject](updateFunc).mapValues(_.counter)
     }
 
     testOperation(inputData, updateStateOperation, outputData, true)
diff --git a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
index e6ac7b35aa..f8380af331 100644
--- a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
@@ -236,14 +236,14 @@ class WindowOperationsSuite extends TestSuiteBase {
     testOperation(input, operation, expectedOutput, numBatches, true)
   }
 
-  test("countByKeyAndWindow") {
-    val input = Seq(Seq(("a", 1)), Seq(("b", 1), ("b", 2)), Seq(("a", 10), ("b", 20)))
+  test("countByValueAndWindow") {
+    val input = Seq(Seq("a"), Seq("b", "b"), Seq("a", "b"))
     val expectedOutput = Seq( Seq(("a", 1)), Seq(("a", 1), ("b", 2)), Seq(("a", 1), ("b", 3)))
     val windowDuration = Seconds(2)
     val slideDuration = Seconds(1)
     val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
-    val operation = (s: DStream[(String, Int)]) => {
-      s.countByKeyAndWindow(windowDuration, slideDuration).map(x => (x._1, x._2.toInt))
+    val operation = (s: DStream[String]) => {
+      s.countByValueAndWindow(windowDuration, slideDuration).map(x => (x._1, x._2.toInt))
     }
     testOperation(input, operation, expectedOutput, numBatches, true)
   }

From def8126d7788a8bd991ac6f9f9403de701a39dc5 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 14 Feb 2013 17:49:43 -0800
Subject: [PATCH 287/291] Added TwitterInputDStream from example to
 StreamingContext. Renamed example TwitterBasic to TwitterPopularTags.

---
 ...erBasic.scala => TwitterPopularTags.scala} | 33 +++++-------
 project/SparkBuild.scala                      |  8 ++-
 .../spark/streaming/StreamingContext.scala    | 52 +++++++++++++------
 .../dstream}/TwitterInputDStream.scala        |  5 +-
 4 files changed, 53 insertions(+), 45 deletions(-)
 rename examples/src/main/scala/spark/streaming/examples/{twitter/TwitterBasic.scala => TwitterPopularTags.scala} (55%)
 rename {examples/src/main/scala/spark/streaming/examples/twitter => streaming/src/main/scala/spark/streaming/dstream}/TwitterInputDStream.scala (94%)

diff --git a/examples/src/main/scala/spark/streaming/examples/twitter/TwitterBasic.scala b/examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala
similarity index 55%
rename from examples/src/main/scala/spark/streaming/examples/twitter/TwitterBasic.scala
rename to examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala
index 377bc0c98e..fdb3a4c73c 100644
--- a/examples/src/main/scala/spark/streaming/examples/twitter/TwitterBasic.scala
+++ b/examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala
@@ -1,19 +1,19 @@
-package spark.streaming.examples.twitter
+package spark.streaming.examples
 
-import spark.streaming.StreamingContext._
 import spark.streaming.{Seconds, StreamingContext}
+import StreamingContext._
 import spark.SparkContext._
-import spark.storage.StorageLevel
 
 /**
  * Calculates popular hashtags (topics) over sliding 10 and 60 second windows from a Twitter
  * stream. The stream is instantiated with credentials and optionally filters supplied by the
  * command line arguments.
+ *
  */
-object TwitterBasic {
+object TwitterPopularTags {
   def main(args: Array[String]) {
     if (args.length < 3) {
-      System.err.println("Usage: TwitterBasic <master> <twitter_username> <twitter_password>" +
+      System.err.println("Usage: TwitterPopularTags <master> <twitter_username> <twitter_password>" +
         " [filter1] [filter2] ... [filter n]")
       System.exit(1)
     }
@@ -21,10 +21,8 @@ object TwitterBasic {
     val Array(master, username, password) = args.slice(0, 3)
     val filters = args.slice(3, args.length)
 
-    val ssc = new StreamingContext(master, "TwitterBasic", Seconds(2))
-    val stream = new TwitterInputDStream(ssc, username, password, filters,
-      StorageLevel.MEMORY_ONLY_SER)
-    ssc.registerInputStream(stream)
+    val ssc = new StreamingContext(master, "TwitterPopularTags", Seconds(2))
+    val stream = ssc.twitterStream(username, password, filters)
 
     val hashTags = stream.flatMap(status => status.getText.split(" ").filter(_.startsWith("#")))
 
@@ -39,22 +37,17 @@ object TwitterBasic {
 
     // Print popular hashtags
     topCounts60.foreach(rdd => {
-      if (rdd.count() != 0) {
-        val topList = rdd.take(5)
-        println("\nPopular topics in last 60 seconds (%s total):".format(rdd.count()))
-        topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}
-      }
+      val topList = rdd.take(5)
+      println("\nPopular topics in last 60 seconds (%s total):".format(rdd.count()))
+      topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}
     })
 
     topCounts10.foreach(rdd => {
-      if (rdd.count() != 0) {
-        val topList = rdd.take(5)
-        println("\nPopular topics in last 10 seconds (%s total):".format(rdd.count()))
-        topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}
-      }
+      val topList = rdd.take(5)
+      println("\nPopular topics in last 10 seconds (%s total):".format(rdd.count()))
+      topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}
     })
 
     ssc.start()
   }
-
 }
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index af8b5ba017..c6d3cc8b15 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -154,10 +154,7 @@ object SparkBuild extends Build {
   )
 
   def examplesSettings = sharedSettings ++ Seq(
-    name := "spark-examples",
-    libraryDependencies ++= Seq(
-      "org.twitter4j" % "twitter4j-stream" % "3.0.3"
-    )
+    name := "spark-examples"
   )
 
   def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
@@ -166,7 +163,8 @@ object SparkBuild extends Build {
     name := "spark-streaming",
     libraryDependencies ++= Seq(
       "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile",
-      "com.github.sgroschupf" % "zkclient" % "0.1"
+      "com.github.sgroschupf" % "zkclient" % "0.1",
+      "org.twitter4j" % "twitter4j-stream" % "3.0.3"
     )
   ) ++ assemblySettings ++ extraAssemblySettings
 
diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
index 8cfbec51d2..9be9d884be 100644
--- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
@@ -17,6 +17,7 @@ import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
 import org.apache.hadoop.fs.Path
 import java.util.UUID
+import twitter4j.Status
 
 /**
  * A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic
@@ -30,14 +31,14 @@ class StreamingContext private (
   ) extends Logging {
 
   /**
-   * Creates a StreamingContext using an existing SparkContext.
+   * Create a StreamingContext using an existing SparkContext.
    * @param sparkContext Existing SparkContext
    * @param batchDuration The time interval at which streaming data will be divided into batches
    */
   def this(sparkContext: SparkContext, batchDuration: Duration) = this(sparkContext, null, batchDuration)
 
   /**
-   * Creates a StreamingContext by providing the details necessary for creating a new SparkContext.
+   * Create a StreamingContext by providing the details necessary for creating a new SparkContext.
    * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
    * @param frameworkName A name for your job, to display on the cluster web UI
    * @param batchDuration The time interval at which streaming data will be divided into batches
@@ -46,7 +47,7 @@ class StreamingContext private (
     this(StreamingContext.createNewSparkContext(master, frameworkName), null, batchDuration)
 
   /**
-   * Re-creates a StreamingContext from a checkpoint file.
+   * Re-create a StreamingContext from a checkpoint file.
    * @param path Path either to the directory that was specified as the checkpoint directory, or
    *             to the checkpoint file 'graph' or 'graph.bk'.
    */
@@ -101,12 +102,12 @@ class StreamingContext private (
   protected[streaming] var scheduler: Scheduler = null
 
   /**
-   * Returns the associated Spark context
+   * Return the associated Spark context
    */
   def sparkContext = sc
 
   /**
-   * Sets each DStreams in this context to remember RDDs it generated in the last given duration.
+   * Set each DStreams in this context to remember RDDs it generated in the last given duration.
    * DStreams remember RDDs only for a limited duration of time and releases them for garbage
    * collection. This method allows the developer to specify how to long to remember the RDDs (
    * if the developer wishes to query old data outside the DStream computation).
@@ -117,7 +118,7 @@ class StreamingContext private (
   }
 
   /**
-   * Sets the context to periodically checkpoint the DStream operations for master
+   * Set the context to periodically checkpoint the DStream operations for master
    * fault-tolerance. By default, the graph will be checkpointed every batch interval.
    * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored
    * @param interval checkpoint interval
@@ -200,7 +201,7 @@ class StreamingContext private (
   }
 
   /**
-   * Creates a input stream from a Flume source.
+   * Create a input stream from a Flume source.
    * @param hostname Hostname of the slave machine to which the flume data will be sent
    * @param port     Port of the slave machine to which the flume data will be sent
    * @param storageLevel  Storage level to use for storing the received objects
@@ -236,7 +237,7 @@ class StreamingContext private (
   }
 
   /**
-   * Creates a input stream that monitors a Hadoop-compatible filesystem
+   * Create a input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them using the given key-value types and input format.
    * File names starting with . are ignored.
    * @param directory HDFS directory to monitor for new file
@@ -255,7 +256,7 @@ class StreamingContext private (
   }
 
   /**
-   * Creates a input stream that monitors a Hadoop-compatible filesystem
+   * Create a input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them using the given key-value types and input format.
    * @param directory HDFS directory to monitor for new file
    * @param filter Function to filter paths to process
@@ -274,9 +275,8 @@ class StreamingContext private (
     inputStream
   }
 
-
   /**
-   * Creates a input stream that monitors a Hadoop-compatible filesystem
+   * Create a input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them as text files (using key as LongWritable, value
    * as Text and input format as TextInputFormat). File names starting with . are ignored.
    * @param directory HDFS directory to monitor for new file
@@ -286,7 +286,25 @@ class StreamingContext private (
   }
 
   /**
-   * Creates an input stream from a queue of RDDs. In each batch,
+   * Create a input stream that returns tweets received from Twitter.
+   * @param username Twitter username
+   * @param password Twitter password
+   * @param filters Set of filter strings to get only those tweets that match them
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  def twitterStream(
+      username: String,
+      password: String,
+      filters: Seq[String],
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+    ): DStream[Status] = {
+    val inputStream = new TwitterInputDStream(this, username, password, filters, storageLevel)
+    registerInputStream(inputStream)
+    inputStream
+  }
+
+  /**
+   * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    * @param queue      Queue of RDDs
    * @param oneAtATime Whether only one RDD should be consumed from the queue in every interval
@@ -300,7 +318,7 @@ class StreamingContext private (
   }
 
   /**
-   * Creates an input stream from a queue of RDDs. In each batch,
+   * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    * @param queue      Queue of RDDs
    * @param oneAtATime Whether only one RDD should be consumed from the queue in every interval
@@ -325,7 +343,7 @@ class StreamingContext private (
   }
 
   /**
-   * Registers an input stream that will be started (InputDStream.start() called) to get the
+   * Register an input stream that will be started (InputDStream.start() called) to get the
    * input data.
    */
   def registerInputStream(inputStream: InputDStream[_]) {
@@ -333,7 +351,7 @@ class StreamingContext private (
   }
 
   /**
-   * Registers an output stream that will be computed every interval
+   * Register an output stream that will be computed every interval
    */
   def registerOutputStream(outputStream: DStream[_]) {
     graph.addOutputStream(outputStream)
@@ -351,7 +369,7 @@ class StreamingContext private (
   }
 
   /**
-   * Starts the execution of the streams.
+   * Start the execution of the streams.
    */
   def start() {
     if (checkpointDir != null && checkpointDuration == null && graph != null) {
@@ -379,7 +397,7 @@ class StreamingContext private (
   }
 
   /**
-   * Stops the execution of the streams.
+   * Stop the execution of the streams.
    */
   def stop() {
     try {
diff --git a/examples/src/main/scala/spark/streaming/examples/twitter/TwitterInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
similarity index 94%
rename from examples/src/main/scala/spark/streaming/examples/twitter/TwitterInputDStream.scala
rename to streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
index 99ed4cdc1c..d733254ddb 100644
--- a/examples/src/main/scala/spark/streaming/examples/twitter/TwitterInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
@@ -1,4 +1,4 @@
-package spark.streaming.examples.twitter
+package spark.streaming.dstream
 
 import spark._
 import spark.streaming._
@@ -6,7 +6,6 @@ import dstream.{NetworkReceiver, NetworkInputDStream}
 import storage.StorageLevel
 import twitter4j._
 import twitter4j.auth.BasicAuthorization
-import collection.JavaConversions._
 
 /* A stream of Twitter statuses, potentially filtered by one or more keywords.
 *
@@ -50,7 +49,7 @@ class TwitterReceiver(
       def onTrackLimitationNotice(i: Int) {}
       def onScrubGeo(l: Long, l1: Long) {}
       def onStallWarning(stallWarning: StallWarning) {}
-      def onException(e: Exception) {}
+      def onException(e: Exception) { stopOnError(e) }
     })
 
     val query: FilterQuery = new FilterQuery

From 4b8402e900c803e64b8a4e2094fd845ccfc9df36 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 14 Feb 2013 18:10:37 -0800
Subject: [PATCH 288/291] Moved Java streaming examples to
 examples/src/main/java/spark/streaming/... and fixed logging in
 NetworkInputTracker to highlight errors when receiver deregisters/shuts down.

---
 .../spark/streaming/examples/JavaFlumeEventCount.java           | 0
 .../spark/streaming/examples/JavaNetworkWordCount.java          | 0
 .../spark/streaming/examples/JavaQueueStream.java               | 0
 .../src/main/scala/spark/streaming/NetworkInputTracker.scala    | 2 +-
 4 files changed, 1 insertion(+), 1 deletion(-)
 rename examples/src/main/{scala => java}/spark/streaming/examples/JavaFlumeEventCount.java (100%)
 rename examples/src/main/{scala => java}/spark/streaming/examples/JavaNetworkWordCount.java (100%)
 rename examples/src/main/{scala => java}/spark/streaming/examples/JavaQueueStream.java (100%)

diff --git a/examples/src/main/scala/spark/streaming/examples/JavaFlumeEventCount.java b/examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java
similarity index 100%
rename from examples/src/main/scala/spark/streaming/examples/JavaFlumeEventCount.java
rename to examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java
diff --git a/examples/src/main/scala/spark/streaming/examples/JavaNetworkWordCount.java b/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java
similarity index 100%
rename from examples/src/main/scala/spark/streaming/examples/JavaNetworkWordCount.java
rename to examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java
diff --git a/examples/src/main/scala/spark/streaming/examples/JavaQueueStream.java b/examples/src/main/java/spark/streaming/examples/JavaQueueStream.java
similarity index 100%
rename from examples/src/main/scala/spark/streaming/examples/JavaQueueStream.java
rename to examples/src/main/java/spark/streaming/examples/JavaQueueStream.java
diff --git a/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala b/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala
index b54f53b203..ca5f11fdba 100644
--- a/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala
+++ b/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala
@@ -86,7 +86,7 @@ class NetworkInputTracker(
       }
       case DeregisterReceiver(streamId, msg) => {
         receiverInfo -= streamId
-        logInfo("De-registered receiver for network stream " + streamId
+        logError("De-registered receiver for network stream " + streamId
           + " with message " + msg)
         //TODO: Do something about the corresponding NetworkInputDStream
       }

From ddcb976b0d7ce4a76168da33c0e947a5a6b5a255 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Fri, 15 Feb 2013 06:54:47 +0000
Subject: [PATCH 289/291] Made MasterFailureTest more robust.

---
 .../streaming/util/MasterFailureTest.scala    | 26 ++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala b/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
index 83d8591a3a..776e676063 100644
--- a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
+++ b/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
@@ -40,6 +40,8 @@ object MasterFailureTest extends Logging {
 
     println("\n\n================= UPDATE-STATE-BY-KEY TEST =================\n\n")
     testUpdateStateByKey(directory, numBatches, batchDuration)
+
+    println("\n\nSUCCESS\n\n")
   }
 
   def testMap(directory: String, numBatches: Int, batchDuration: Duration) {
@@ -347,7 +349,8 @@ class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
 
   override def run() {
     val localTestDir = Files.createTempDir()
-    val fs = testDir.getFileSystem(new Configuration())
+    var fs = testDir.getFileSystem(new Configuration())
+    val maxTries = 3
     try {
       Thread.sleep(5000) // To make sure that all the streaming context has been set up
       for (i <- 0 until input.size) {
@@ -355,9 +358,24 @@ class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
         val localFile = new File(localTestDir, (i+1).toString)
         val hadoopFile = new Path(testDir, (i+1).toString)
         FileUtils.writeStringToFile(localFile, input(i).toString + "\n")
-        //fs.moveFromLocalFile(new Path(localFile.toString), new Path(testDir, i.toString))
-        fs.copyFromLocalFile(new Path(localFile.toString), hadoopFile)
-        logInfo("Generated file " + hadoopFile + " at " + System.currentTimeMillis)
+        var tries = 0
+	var done = false
+        while (!done && tries < maxTries) {
+          tries += 1
+          try {
+            fs.copyFromLocalFile(new Path(localFile.toString), hadoopFile)
+	    done = true
+	  } catch {
+	    case ioe: IOException => { 
+              fs = testDir.getFileSystem(new Configuration()) 
+              logWarning("Attempt " + tries + " at generating file " + hadoopFile + " failed.", ioe)
+	    }
+	  }
+        }
+	if (!done) 
+          logError("Could not generate file " + hadoopFile)
+        else 
+          logInfo("Generated file " + hadoopFile + " at " + System.currentTimeMillis)
         Thread.sleep(interval)
         localFile.delete()
       }

From f98c7da23ef66812b8b4888230ee98c07f09af23 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Sun, 17 Feb 2013 15:06:41 -0800
Subject: [PATCH 290/291] Many changes to ensure better 2nd recovery if 2nd
 failure happens while recovering from 1st failure - Made the scheduler to
 checkpoint after clearing old metadata which   ensures that a new checkpoint
 is written as soon as at least one batch   gets computed  while recovering
 from a failure. This ensures that if   there is a 2nd failure while
 recovering from 1st failure, the system   start 2nd recovery from a newer
 checkpoint. - Modified Checkpoint writer to write checkpoint in a different
 thread. - Added a check to make sure that compute for InputDStreams gets
 called   only for strictly increasing times. - Changed implementation of
 slice to call getOrCompute on parent DStream   in time-increasing order. -
 Added testcase to test slice. - Fixed testGroupByKeyAndWindow testcase in
 JavaAPISuite to verify   results with expected output in an order-independent
 manner.

---
 .../scala/spark/streaming/Checkpoint.scala    | 71 ++++++++++++-------
 .../main/scala/spark/streaming/DStream.scala  | 27 ++++---
 .../scala/spark/streaming/DStreamGraph.scala  | 13 +++-
 .../scala/spark/streaming/JobManager.scala    |  8 ++-
 .../scala/spark/streaming/Scheduler.scala     | 27 ++++---
 .../spark/streaming/StreamingContext.scala    |  7 +-
 .../src/main/scala/spark/streaming/Time.scala | 11 ++-
 .../api/java/JavaStreamingContext.scala       |  7 +-
 .../streaming/dstream/InputDStream.scala      | 36 +++++++++-
 .../dstream/TwitterInputDStream.scala         |  4 +-
 .../streaming/util/MasterFailureTest.scala    |  2 +-
 .../java/spark/streaming/JavaAPISuite.java    | 54 +++++++++-----
 .../java/spark/streaming/JavaTestUtils.scala  |  1 +
 streaming/src/test/resources/log4j.properties |  4 +-
 .../streaming/BasicOperationsSuite.scala      | 20 ++++++
 .../spark/streaming/CheckpointSuite.scala     |  5 +-
 .../scala/spark/streaming/TestSuiteBase.scala |  7 +-
 .../streaming/WindowOperationsSuite.scala     |  5 +-
 18 files changed, 210 insertions(+), 99 deletions(-)

diff --git a/streaming/src/main/scala/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/spark/streaming/Checkpoint.scala
index b9eb7f8ec4..7405c8b22e 100644
--- a/streaming/src/main/scala/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/spark/streaming/Checkpoint.scala
@@ -6,6 +6,8 @@ import org.apache.hadoop.fs.{FileUtil, Path}
 import org.apache.hadoop.conf.Configuration
 
 import java.io._
+import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
+import java.util.concurrent.Executors
 
 
 private[streaming]
@@ -38,32 +40,50 @@ class CheckpointWriter(checkpointDir: String) extends Logging {
   val conf = new Configuration()
   var fs = file.getFileSystem(conf)
   val maxAttempts = 3
+  val executor = Executors.newFixedThreadPool(1)
+
+  class CheckpointWriteHandler(checkpointTime: Time, bytes: Array[Byte]) extends Runnable {
+    def run() {
+      var attempts = 0
+      val startTime = System.currentTimeMillis()
+      while (attempts < maxAttempts) {
+        attempts += 1
+        try {
+          logDebug("Saving checkpoint for time " + checkpointTime + " to file '" + file + "'")
+          if (fs.exists(file)) {
+            val bkFile = new Path(file.getParent, file.getName + ".bk")
+            FileUtil.copy(fs, file, fs, bkFile, true, true, conf)
+            logDebug("Moved existing checkpoint file to " + bkFile)
+          }
+          val fos = fs.create(file)
+          fos.write(bytes)
+          fos.close()
+          fos.close()
+          val finishTime = System.currentTimeMillis();
+          logInfo("Checkpoint for time " + checkpointTime + " saved to file '" + file +
+            "', took " + bytes.length + " bytes and " + (finishTime - startTime) + " milliseconds")
+          return
+        } catch {
+          case ioe: IOException =>
+            logWarning("Error writing checkpoint to file in " + attempts + " attempts", ioe)
+        }
+      }
+      logError("Could not write checkpoint for time " + checkpointTime + " to file '" + file + "'")
+    }
+  }
 
   def write(checkpoint: Checkpoint) {
-    // TODO: maybe do this in a different thread from the main stream execution thread
-    var attempts = 0
-    while (attempts < maxAttempts) {
-      attempts += 1
-      try {
-        logDebug("Saving checkpoint for time " + checkpoint.checkpointTime + " to file '" + file + "'")
-        if (fs.exists(file)) {
-          val bkFile = new Path(file.getParent, file.getName + ".bk")
-          FileUtil.copy(fs, file, fs, bkFile, true, true, conf)
-          logDebug("Moved existing checkpoint file to " + bkFile)
-        }
-        val fos = fs.create(file)
-        val oos = new ObjectOutputStream(fos)
-        oos.writeObject(checkpoint)
-        oos.close()
-        logInfo("Checkpoint for time " + checkpoint.checkpointTime + " saved to file '" + file + "'")
-        fos.close()
-        return
-      } catch {
-        case ioe: IOException =>
-          logWarning("Error writing checkpoint to file in " + attempts + " attempts", ioe)
-      }
-    }
-    logError("Could not write checkpoint for time " + checkpoint.checkpointTime + " to file '" + file + "'")
+    val bos = new ByteArrayOutputStream()
+    val zos = new LZFOutputStream(bos)
+    val oos = new ObjectOutputStream(zos)
+    oos.writeObject(checkpoint)
+    oos.close()
+    bos.close()
+    executor.execute(new CheckpointWriteHandler(checkpoint.checkpointTime, bos.toByteArray))
+  }
+
+  def stop() {
+    executor.shutdown()
   }
 }
 
@@ -85,7 +105,8 @@ object CheckpointReader extends Logging {
           // of ObjectInputStream is used to explicitly use the current thread's default class
           // loader to find and load classes. This is a well know Java issue and has popped up
           // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627)
-          val ois = new ObjectInputStreamWithLoader(fis, Thread.currentThread().getContextClassLoader)
+          val zis = new LZFInputStream(fis)
+          val ois = new ObjectInputStreamWithLoader(zis, Thread.currentThread().getContextClassLoader)
           val cp = ois.readObject.asInstanceOf[Checkpoint]
           ois.close()
           fs.close()
diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index ce42b742d7..84e4b5bedb 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -238,13 +238,15 @@ abstract class DStream[T: ClassManifest] (
     dependencies.foreach(_.remember(parentRememberDuration))
   }
 
-  /** This method checks whether the 'time' is valid wrt slideDuration for generating RDD */
+  /** Checks whether the 'time' is valid wrt slideDuration for generating RDD */
   protected def isTimeValid(time: Time): Boolean = {
     if (!isInitialized) {
       throw new Exception (this + " has not been initialized")
     } else if (time <= zeroTime || ! (time - zeroTime).isMultipleOf(slideDuration)) {
+      logInfo("Time " + time + " is invalid as zeroTime is " + zeroTime + " and slideDuration is " + slideDuration + " and difference is " + (time - zeroTime))
       false
     } else {
+      logInfo("Time " + time + " is valid")
       true
     }
   }
@@ -627,16 +629,21 @@ abstract class DStream[T: ClassManifest] (
    * Return all the RDDs between 'fromTime' to 'toTime' (both included)
    */
   def slice(fromTime: Time, toTime: Time): Seq[RDD[T]] = {
-    val rdds = new ArrayBuffer[RDD[T]]()
-    var time = toTime.floor(slideDuration)
-    while (time >= zeroTime && time >= fromTime) {
-      getOrCompute(time) match {
-        case Some(rdd) => rdds += rdd
-        case None => //throw new Exception("Could not get RDD for time " + time)
-      }
-      time -= slideDuration
+    if (!(fromTime - zeroTime).isMultipleOf(slideDuration)) {
+      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration (" + slideDuration + ")")
     }
-    rdds.toSeq
+    if (!(toTime - zeroTime).isMultipleOf(slideDuration)) {
+      logWarning("toTime (" + fromTime + ") is not a multiple of slideDuration (" + slideDuration + ")")
+    }
+    val alignedToTime = toTime.floor(slideDuration)
+    val alignedFromTime = fromTime.floor(slideDuration)
+
+    logInfo("Slicing from " + fromTime + " to " + toTime +
+      " (aligned to " + alignedFromTime + " and " + alignedToTime + ")")
+
+    alignedFromTime.to(alignedToTime, slideDuration).flatMap(time => {
+      if (time >= zeroTime) getOrCompute(time) else None
+    })
   }
 
   /**
diff --git a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
index 22d9e24f05..adb7f3a24d 100644
--- a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
@@ -86,10 +86,12 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
 
   def getOutputStreams() = this.synchronized { outputStreams.toArray }
 
-  def generateRDDs(time: Time): Seq[Job] = {
+  def generateJobs(time: Time): Seq[Job] = {
     this.synchronized {
-      logInfo("Generating RDDs for time " + time)
-      outputStreams.flatMap(outputStream => outputStream.generateJob(time))
+      logInfo("Generating jobs for time " + time)
+      val jobs = outputStreams.flatMap(outputStream => outputStream.generateJob(time))
+      logInfo("Generated " + jobs.length + " jobs for time " + time)
+      jobs
     }
   }
 
@@ -97,18 +99,23 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     this.synchronized {
       logInfo("Clearing old metadata for time " + time)
       outputStreams.foreach(_.clearOldMetadata(time))
+      logInfo("Cleared old metadata for time " + time)
     }
   }
 
   def updateCheckpointData(time: Time) {
     this.synchronized {
+      logInfo("Updating checkpoint data for time " + time)
       outputStreams.foreach(_.updateCheckpointData(time))
+      logInfo("Updated checkpoint data for time " + time)
     }
   }
 
   def restoreCheckpointData() {
     this.synchronized {
+      logInfo("Restoring checkpoint data")
       outputStreams.foreach(_.restoreCheckpointData())
+      logInfo("Restored checkpoint data")
     }
   }
 
diff --git a/streaming/src/main/scala/spark/streaming/JobManager.scala b/streaming/src/main/scala/spark/streaming/JobManager.scala
index 649494ff4a..7696c4a592 100644
--- a/streaming/src/main/scala/spark/streaming/JobManager.scala
+++ b/streaming/src/main/scala/spark/streaming/JobManager.scala
@@ -43,20 +43,24 @@ class JobManager(ssc: StreamingContext, numThreads: Int = 1) extends Logging {
   }
 
   private def clearJob(job: Job) {
+    var timeCleared = false
+    val time = job.time
     jobs.synchronized {
-      val time = job.time
       val jobsOfTime = jobs.get(time)
       if (jobsOfTime.isDefined) {
         jobsOfTime.get -= job
         if (jobsOfTime.get.isEmpty) {
-          ssc.scheduler.clearOldMetadata(time)
           jobs -= time
+          timeCleared = true
         }
       } else {
         throw new Exception("Job finished for time " + job.time +
           " but time does not exist in jobs")
       }
     }
+    if (timeCleared) {
+      ssc.scheduler.clearOldMetadata(time)
+    }
   }
 
   def getPendingTimes(): Array[Time] = {
diff --git a/streaming/src/main/scala/spark/streaming/Scheduler.scala b/streaming/src/main/scala/spark/streaming/Scheduler.scala
index 57d494da83..1c4b22a898 100644
--- a/streaming/src/main/scala/spark/streaming/Scheduler.scala
+++ b/streaming/src/main/scala/spark/streaming/Scheduler.scala
@@ -20,8 +20,9 @@ class Scheduler(ssc: StreamingContext) extends Logging {
   val clockClass = System.getProperty("spark.streaming.clock", "spark.streaming.util.SystemClock")
   val clock = Class.forName(clockClass).newInstance().asInstanceOf[Clock]
   val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds,
-    longTime => generateRDDs(new Time(longTime)))
+    longTime => generateJobs(new Time(longTime)))
   val graph = ssc.graph
+  var latestTime: Time = null
 
   def start() = synchronized {
     if (ssc.isCheckpointPresent) {
@@ -35,6 +36,7 @@ class Scheduler(ssc: StreamingContext) extends Logging {
   def stop() = synchronized {
     timer.stop()
     jobManager.stop()
+    if (checkpointWriter != null) checkpointWriter.stop()
     ssc.graph.stop()
     logInfo("Scheduler stopped")    
   }
@@ -73,35 +75,38 @@ class Scheduler(ssc: StreamingContext) extends Logging {
     val timesToReschedule = (pendingTimes ++ downTimes).distinct.sorted(Time.ordering)
     logInfo("Batches to reschedule: " + timesToReschedule.mkString(", "))
     timesToReschedule.foreach(time =>
-      graph.generateRDDs(time).foreach(jobManager.runJob)
+      graph.generateJobs(time).foreach(jobManager.runJob)
     )
 
     // Restart the timer
     timer.start(restartTime.milliseconds)
-    logInfo("Scheduler's timer restarted")
+    logInfo("Scheduler's timer restarted at " + restartTime)
   }
 
-  /** Generates the RDDs, clears old metadata and does checkpoint for the given time */
-  def generateRDDs(time: Time) {
+  /** Generate jobs and perform checkpoint for the given `time`.  */
+  def generateJobs(time: Time) {
     SparkEnv.set(ssc.env)
     logInfo("\n-----------------------------------------------------\n")
-    graph.generateRDDs(time).foreach(jobManager.runJob)
+    graph.generateJobs(time).foreach(jobManager.runJob)
+    latestTime = time
     doCheckpoint(time)
   }
 
-
+  /**
+   * Clear old metadata assuming jobs of `time` have finished processing.
+   * And also perform checkpoint.
+   */
   def clearOldMetadata(time: Time) {
     ssc.graph.clearOldMetadata(time)
+    doCheckpoint(time)
   }
 
-  def doCheckpoint(time: Time) {
+  /** Perform checkpoint for the give `time`. */
+  def doCheckpoint(time: Time) = synchronized {
     if (ssc.checkpointDuration != null && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
       logInfo("Checkpointing graph for time " + time)
-      val startTime = System.currentTimeMillis()
       ssc.graph.updateCheckpointData(time)
       checkpointWriter.write(new Checkpoint(ssc, time))
-      val stopTime = System.currentTimeMillis()
-      logInfo("Checkpointing the graph took " + (stopTime - startTime) + " ms")
     }
   }
 }
diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
index 9be9d884be..d1407b7869 100644
--- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/StreamingContext.scala
@@ -119,18 +119,15 @@ class StreamingContext private (
 
   /**
    * Set the context to periodically checkpoint the DStream operations for master
-   * fault-tolerance. By default, the graph will be checkpointed every batch interval.
+   * fault-tolerance. The graph will be checkpointed every batch interval.
    * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored
-   * @param interval checkpoint interval
    */
-  def checkpoint(directory: String, interval: Duration = null) {
+  def checkpoint(directory: String) {
     if (directory != null) {
       sc.setCheckpointDir(StreamingContext.getSparkCheckpointDir(directory))
       checkpointDir = directory
-      checkpointDuration = interval
     } else {
       checkpointDir = null
-      checkpointDuration = null
     }
   }
 
diff --git a/streaming/src/main/scala/spark/streaming/Time.scala b/streaming/src/main/scala/spark/streaming/Time.scala
index 8201e84a20..f14decf08b 100644
--- a/streaming/src/main/scala/spark/streaming/Time.scala
+++ b/streaming/src/main/scala/spark/streaming/Time.scala
@@ -38,15 +38,14 @@ case class Time(private val millis: Long) {
   def max(that: Time): Time = if (this > that) this else that
 
   def until(that: Time, interval: Duration): Seq[Time] = {
-    assert(that > this, "Cannot create sequence as " + that + " not more than " + this)
-    assert(
-      (that - this).isMultipleOf(interval),
-      "Cannot create sequence as gap between " + that + " and " +
-        this + " is not multiple of " + interval
-    )
     (this.milliseconds) until (that.milliseconds) by (interval.milliseconds) map (new Time(_))
   }
 
+  def to(that: Time, interval: Duration): Seq[Time] = {
+    (this.milliseconds) to (that.milliseconds) by (interval.milliseconds) map (new Time(_))
+  }
+
+
   override def toString: String = (millis.toString + " ms")
 
 }
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
index 5bbf2b084f..03933aae93 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
@@ -314,12 +314,11 @@ class JavaStreamingContext(val ssc: StreamingContext) {
 
   /**
    * Sets the context to periodically checkpoint the DStream operations for master
-   * fault-tolerance. By default, the graph will be checkpointed every batch interval.
+   * fault-tolerance. The graph will be checkpointed every batch interval.
    * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored
-   * @param interval checkpoint interval
    */
-  def checkpoint(directory: String, interval: Duration = null) {
-    ssc.checkpoint(directory, interval)
+  def checkpoint(directory: String) {
+    ssc.checkpoint(directory)
   }
 
   /**
diff --git a/streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala
index 980ca5177e..a4db44a608 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala
@@ -1,10 +1,42 @@
 package spark.streaming.dstream
 
-import spark.streaming.{Duration, StreamingContext, DStream}
+import spark.streaming.{Time, Duration, StreamingContext, DStream}
 
+/**
+ * This is the abstract base class for all input streams. This class provides to methods
+ * start() and stop() which called by the scheduler to start and stop receiving data/
+ * Input streams that can generated RDDs from new data just by running a service on
+ * the driver node (that is, without running a receiver onworker nodes) can be
+ * implemented by directly subclassing this InputDStream. For example,
+ * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory for
+ * new files and generates RDDs on the new files. For implementing input streams
+ * that requires running a receiver on the worker nodes, use NetworkInputDStream
+ * as the parent class.
+ */
 abstract class InputDStream[T: ClassManifest] (@transient ssc_ : StreamingContext)
   extends DStream[T](ssc_) {
 
+  var lastValidTime: Time = null
+
+  /**
+   * Checks whether the 'time' is valid wrt slideDuration for generating RDD.
+   * Additionally it also ensures valid times are in strictly increasing order.
+   * This ensures that InputDStream.compute() is called strictly on increasing
+   * times.
+   */
+  override protected def isTimeValid(time: Time): Boolean = {
+    if (!super.isTimeValid(time)) {
+      false // Time not valid
+    } else {
+      // Time is valid, but check it it is more than lastValidTime
+      if (lastValidTime == null || lastValidTime <= time) {
+        logWarning("isTimeValid called with " + time + " where as last valid time is " + lastValidTime)
+      }
+      lastValidTime = time
+      true
+    }
+  }
+
   override def dependencies = List()
 
   override def slideDuration: Duration = {
@@ -13,7 +45,9 @@ abstract class InputDStream[T: ClassManifest] (@transient ssc_ : StreamingContex
     ssc.graph.batchDuration
   }
 
+  /** Method called to start receiving data. Subclasses must implement this method. */
   def start()
 
+  /** Method called to stop receiving data. Subclasses must implement this method. */
   def stop()
 }
diff --git a/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
index d733254ddb..e70822e5c3 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
@@ -2,8 +2,8 @@ package spark.streaming.dstream
 
 import spark._
 import spark.streaming._
-import dstream.{NetworkReceiver, NetworkInputDStream}
 import storage.StorageLevel
+
 import twitter4j._
 import twitter4j.auth.BasicAuthorization
 
@@ -19,7 +19,7 @@ class TwitterInputDStream(
     password: String,
     filters: Seq[String],
     storageLevel: StorageLevel
-    ) extends NetworkInputDStream[Status](ssc_)  {
+  ) extends NetworkInputDStream[Status](ssc_)  {
 
   override def createReceiver(): NetworkReceiver[Status] = {
     new TwitterReceiver(username, password, filters, storageLevel)
diff --git a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala b/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
index 776e676063..bdd9f4d753 100644
--- a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
+++ b/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
@@ -315,7 +315,7 @@ class KillingThread(ssc: StreamingContext, maxKillWaitTime: Long) extends Thread
   override def run() {
     try {
       // If it is the first killing, then allow the first checkpoint to be created
-      var minKillWaitTime = if (MasterFailureTest.killCount == 0) 5000 else 1000
+      var minKillWaitTime = if (MasterFailureTest.killCount == 0) 5000 else 2000
       val killWaitTime = minKillWaitTime + math.abs(Random.nextLong % maxKillWaitTime)
       logInfo("Kill wait time = " + killWaitTime)
       Thread.sleep(killWaitTime)
diff --git a/streaming/src/test/java/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
index 7bea0b1fc4..16bacffb92 100644
--- a/streaming/src/test/java/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/spark/streaming/JavaAPISuite.java
@@ -23,6 +23,7 @@ import spark.streaming.JavaCheckpointTestUtils;
 import spark.streaming.dstream.KafkaPartitionKey;
 
 import java.io.*;
+import java.text.Collator;
 import java.util.*;
 
 // The test suite itself is Serializable so that anonymous Function implementations can be
@@ -35,7 +36,7 @@ public class JavaAPISuite implements Serializable {
   public void setUp() {
       System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock");
       ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
-    ssc.checkpoint("checkpoint", new Duration(1000));
+    ssc.checkpoint("checkpoint");
   }
 
   @After
@@ -587,26 +588,47 @@ public class JavaAPISuite implements Serializable {
 
   @Test
   public void testGroupByKeyAndWindow() {
-    List<List<Tuple2<String, String>>> inputData = stringStringKVStream;
+    List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
 
-    List<List<Tuple2<String, List<String>>>> expected = Arrays.asList(
-        Arrays.asList(new Tuple2<String, List<String>>("california", Arrays.asList("dodgers", "giants")),
-          new Tuple2<String, List<String>>("new york", Arrays.asList("yankees", "mets"))),
-        Arrays.asList(new Tuple2<String, List<String>>("california",
-            Arrays.asList("sharks", "ducks", "dodgers", "giants")),
-            new Tuple2<String, List<String>>("new york", Arrays.asList("rangers", "islanders", "yankees", "mets"))),
-        Arrays.asList(new Tuple2<String, List<String>>("california", Arrays.asList("sharks", "ducks")),
-            new Tuple2<String, List<String>>("new york", Arrays.asList("rangers", "islanders"))));
+    List<List<Tuple2<String, List<Integer>>>> expected = Arrays.asList(
+      Arrays.asList(
+        new Tuple2<String, List<Integer>>("california", Arrays.asList(1, 3)),
+        new Tuple2<String, List<Integer>>("new york", Arrays.asList(1, 4))
+      ),
+      Arrays.asList(
+        new Tuple2<String, List<Integer>>("california", Arrays.asList(1, 3, 5, 5)),
+        new Tuple2<String, List<Integer>>("new york", Arrays.asList(1, 1, 3, 4))
+      ),
+      Arrays.asList(
+        new Tuple2<String, List<Integer>>("california", Arrays.asList(5, 5)),
+        new Tuple2<String, List<Integer>>("new york", Arrays.asList(1, 3))
+      )
+    );
 
-    JavaDStream<Tuple2<String, String>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaPairDStream<String, String> pairStream = JavaPairDStream.fromJavaDStream(stream);
+    JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream);
 
-    JavaPairDStream<String, List<String>> groupWindowed =
+    JavaPairDStream<String, List<Integer>> groupWindowed =
         pairStream.groupByKeyAndWindow(new Duration(2000), new Duration(1000));
     JavaTestUtils.attachTestOutputStream(groupWindowed);
-    List<List<Tuple2<String, List<String>>>> result = JavaTestUtils.runStreams(ssc, 3, 3);
+    List<List<Tuple2<String, List<Integer>>>> result = JavaTestUtils.runStreams(ssc, 3, 3);
 
-    Assert.assertEquals(expected, result);
+    assert(result.size() == expected.size());
+    for (int i = 0; i < result.size(); i++) {
+      assert(convert(result.get(i)).equals(convert(expected.get(i))));
+    }
+  }
+
+  private HashSet<Tuple2<String, HashSet<Integer>>> convert(List<Tuple2<String, List<Integer>>> listOfTuples) {
+    List<Tuple2<String, HashSet<Integer>>> newListOfTuples = new ArrayList<Tuple2<String, HashSet<Integer>>>();
+    for (Tuple2<String, List<Integer>> tuple: listOfTuples) {
+      newListOfTuples.add(convert(tuple));
+    }
+    return new HashSet<Tuple2<String, HashSet<Integer>>>(newListOfTuples);
+  }
+
+  private Tuple2<String, HashSet<Integer>> convert(Tuple2<String, List<Integer>> tuple) {
+    return new Tuple2<String, HashSet<Integer>>(tuple._1(), new HashSet<Integer>(tuple._2()));
   }
 
   @Test
@@ -894,7 +916,7 @@ public class JavaAPISuite implements Serializable {
         Arrays.asList(8,7));
 
     File tempDir = Files.createTempDir();
-    ssc.checkpoint(tempDir.getAbsolutePath(), new Duration(1000));
+    ssc.checkpoint(tempDir.getAbsolutePath());
 
     JavaDStream stream = JavaCheckpointTestUtils.attachTestInputStream(ssc, inputData, 1);
     JavaDStream letterCount = stream.map(new Function<String, Integer>() {
diff --git a/streaming/src/test/java/spark/streaming/JavaTestUtils.scala b/streaming/src/test/java/spark/streaming/JavaTestUtils.scala
index 56349837e5..52ea28732a 100644
--- a/streaming/src/test/java/spark/streaming/JavaTestUtils.scala
+++ b/streaming/src/test/java/spark/streaming/JavaTestUtils.scala
@@ -57,6 +57,7 @@ trait JavaTestBase extends TestSuiteBase {
 }
 
 object JavaTestUtils extends JavaTestBase {
+  override def maxWaitTimeMillis = 20000
 
 }
 
diff --git a/streaming/src/test/resources/log4j.properties b/streaming/src/test/resources/log4j.properties
index f0638e0e02..59c445e63f 100644
--- a/streaming/src/test/resources/log4j.properties
+++ b/streaming/src/test/resources/log4j.properties
@@ -1,5 +1,5 @@
 # Set everything to be logged to the file streaming/target/unit-tests.log 
-log4j.rootCategory=WARN, file
+log4j.rootCategory=INFO, file
 # log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
@@ -9,6 +9,4 @@ log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}:
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
-log4j.logger.spark.streaming=INFO
-log4j.logger.spark.streaming.dstream.FileInputDStream=DEBUG
 
diff --git a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
index 1e86cf49bb..8fce91853c 100644
--- a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
@@ -229,6 +229,26 @@ class BasicOperationsSuite extends TestSuiteBase {
     testOperation(inputData, updateStateOperation, outputData, true)
   }
 
+  test("slice") {
+    val ssc = new StreamingContext("local[2]", "BasicOperationSuite", Seconds(1))
+    val input = Seq(Seq(1), Seq(2), Seq(3), Seq(4))
+    val stream = new TestInputStream[Int](ssc, input, 2)
+    ssc.registerInputStream(stream)
+    stream.foreach(_ => {})  // Dummy output stream
+    ssc.start()
+    Thread.sleep(2000)
+    def getInputFromSlice(fromMillis: Long, toMillis: Long) = {
+      stream.slice(new Time(fromMillis), new Time(toMillis)).flatMap(_.collect()).toSet
+    }
+
+    assert(getInputFromSlice(0, 1000) == Set(1))
+    assert(getInputFromSlice(0, 2000) == Set(1, 2))
+    assert(getInputFromSlice(1000, 2000) == Set(1, 2))
+    assert(getInputFromSlice(2000, 4000) == Set(2, 3, 4))
+    ssc.stop()
+    Thread.sleep(1000)
+  }
+
   test("forgetting of RDDs - map and window operations") {
     assert(batchDuration === Seconds(1), "Batch duration has changed from 1 second")
 
diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
index c89c4a8d43..5250667bcb 100644
--- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
@@ -39,14 +39,11 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
 
   override def batchDuration = Milliseconds(500)
 
-  override def checkpointInterval = batchDuration
-
   override def actuallyWait = true
 
   test("basic rdd checkpoints + dstream graph checkpoint recovery") {
 
     assert(batchDuration === Milliseconds(500), "batchDuration for this test must be 1 second")
-    assert(checkpointInterval === batchDuration, "checkpointInterval for this test much be same as batchDuration")
 
     System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
 
@@ -188,7 +185,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     // Set up the streaming context and input streams
     val testDir = Files.createTempDir()
     var ssc = new StreamingContext(master, framework, Seconds(1))
-    ssc.checkpoint(checkpointDir, checkpointInterval)
+    ssc.checkpoint(checkpointDir)
     val fileStream = ssc.textFileStream(testDir.toString)
     // Making value 3 take large time to process, to ensure that the master
     // shuts down in the middle of processing the 3rd batch
diff --git a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
index 2cc31d6137..ad6aa79d10 100644
--- a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
@@ -75,9 +75,6 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
   // Directory where the checkpoint data will be saved
   def checkpointDir = "checkpoint"
 
-  // Duration after which the graph is checkpointed
-  def checkpointInterval = batchDuration
-
   // Number of partitions of the input parallel collections created for testing
   def numInputPartitions = 2
 
@@ -99,7 +96,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     // Create StreamingContext
     val ssc = new StreamingContext(master, framework, batchDuration)
     if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir, checkpointInterval)
+      ssc.checkpoint(checkpointDir)
     }
 
     // Setup the stream computation
@@ -124,7 +121,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     // Create StreamingContext
     val ssc = new StreamingContext(master, framework, batchDuration)
     if (checkpointDir != null) {
-      ssc.checkpoint(checkpointDir, checkpointInterval)
+      ssc.checkpoint(checkpointDir)
     }
 
     // Setup the stream computation
diff --git a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
index f8380af331..1b66f3bda2 100644
--- a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
@@ -273,6 +273,7 @@ class WindowOperationsSuite extends TestSuiteBase {
     slideDuration: Duration = Seconds(1)
     ) {
     test("reduceByKeyAndWindow - " + name) {
+      logInfo("reduceByKeyAndWindow - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
       val operation = (s: DStream[(String, Int)]) => {
         s.reduceByKeyAndWindow((x: Int, y: Int) => x + y, windowDuration, slideDuration)
@@ -288,7 +289,8 @@ class WindowOperationsSuite extends TestSuiteBase {
     windowDuration: Duration = Seconds(2),
     slideDuration: Duration = Seconds(1)
   ) {
-    test("ReduceByKeyAndWindow with inverse function - " + name) {
+    test("reduceByKeyAndWindow with inverse function - " + name) {
+      logInfo("reduceByKeyAndWindow with inverse function - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
       val operation = (s: DStream[(String, Int)]) => {
         s.reduceByKeyAndWindow(_ + _, _ - _, windowDuration, slideDuration)
@@ -306,6 +308,7 @@ class WindowOperationsSuite extends TestSuiteBase {
       slideDuration: Duration = Seconds(1)
     ) {
     test("reduceByKeyAndWindow with inverse and filter functions - " + name) {
+      logInfo("reduceByKeyAndWindow with inverse and filter functions - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
       val filterFunc = (p: (String, Int)) => p._2 != 0
       val operation = (s: DStream[(String, Int)]) => {

From 8ad561dc7d6475d7b217ec3f57bac3b584fed31a Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 18 Feb 2013 02:12:41 -0800
Subject: [PATCH 291/291] Added checkpointing and fault-tolerance semantics to
 the programming guide. Fixed default checkpoint interval to being a multiple
 of slide duration. Fixed visibility of some classes and objects to clean up
 docs.

---
 docs/python-programming-guide.md              |   2 +-
 docs/streaming-programming-guide.md           | 252 ++++++++++++++----
 .../main/scala/spark/streaming/DStream.scala  |   2 +-
 .../main/scala/spark/streaming/Duration.scala |   2 +-
 .../main/scala/spark/streaming/Interval.scala |   1 +
 .../streaming/PairDStreamFunctions.scala      |   8 +-
 .../dstream/TwitterInputDStream.scala         |   2 +
 .../spark/streaming/CheckpointSuite.scala     |   2 +-
 8 files changed, 209 insertions(+), 62 deletions(-)

diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index 4e84d23edf..2012241a6a 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -87,7 +87,7 @@ By default, the `pyspark` shell creates SparkContext that runs jobs locally.
 To connect to a non-local cluster, set the `MASTER` environment variable.
 For example, to use the `pyspark` shell with a [standalone Spark cluster](spark-standalone.html):
 
-{% highlight shell %}
+{% highlight bash %}
 $ MASTER=spark://IP:PORT ./pyspark
 {% endhighlight %}
 
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index b6da7af654..d408e80359 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -34,8 +34,8 @@ The StreamingContext is used to creating InputDStreams from input sources:
 
 {% highlight scala %}
 // Assuming ssc is the StreamingContext
-ssc.networkStream(hostname, port)    // Creates a stream that uses a TCP socket to read data from hostname:port
-ssc.textFileStream(directory)   // Creates a stream by monitoring and processing new files in a HDFS directory
+ssc.networkStream(hostname, port)   // Creates a stream that uses a TCP socket to read data from hostname:port
+ssc.textFileStream(directory)       // Creates a stream by monitoring and processing new files in a HDFS directory
 {% endhighlight %}
 
 A complete list of input sources is available in the [StreamingContext API documentation](api/streaming/index.html#spark.streaming.StreamingContext). Data received from these sources can be processed using DStream operations, which are explained next.
@@ -50,18 +50,18 @@ Once an input DStream has been created, you can transform it using _DStream oper
 DStreams support many of the transformations available on normal Spark RDD's:
 
 <table class="table">
-<tr><th style="width:25%">Transformation</th><th>Meaning</th></tr>
+<tr><th style="width:30%">Transformation</th><th>Meaning</th></tr>
 <tr>
   <td> <b>map</b>(<i>func</i>) </td>
-  <td> Returns a new DStream formed by passing each element of the source through a function <i>func</i>. </td>
+  <td> Returns a new DStream formed by passing each element of the source DStream through a function <i>func</i>. </td>
 </tr>
 <tr>
   <td> <b>filter</b>(<i>func</i>) </td>
-  <td> Returns a new stream formed by selecting those elements of the source on which <i>func</i> returns true. </td>
+  <td> Returns a new DStream formed by selecting those elements of the source DStream on which <i>func</i> returns true. </td>
 </tr>
 <tr>
   <td> <b>flatMap</b>(<i>func</i>) </td>
-  <td> Similar to map, but each input item can be mapped to 0 or more output items (so <i>func</i> should return a Seq rather than a single item). </td>
+  <td> Similar to map, but each input item can be mapped to 0 or more output items (so <i>func</i> should return a <code>Seq</code> rather than a single item). </td>
 </tr>
 <tr>
   <td> <b>mapPartitions</b>(<i>func</i>) </td>
@@ -70,73 +70,92 @@ DStreams support many of the transformations available on normal Spark RDD's:
 </tr>
 <tr>
   <td> <b>union</b>(<i>otherStream</i>) </td>
-  <td> Return a new stream that contains the union of the elements in the source stream and the argument. </td>
+  <td> Return a new DStream that contains the union of the elements in the source DStream and the argument DStream. </td>
+</tr>
+<tr>
+  <td> <b>count</b>() </td>
+  <td> Returns a new DStream of single-element RDDs by counting the number of elements in each RDD of the source DStream.  </td>
+</tr>
+<tr>
+  <td> <b>reduce</b>(<i>func</i>) </td>
+  <td> Returns a new DStream of single-element RDDs by aggregating the elements in each RDD of the source DStream using a function <i>func</i> (which takes two arguments and returns one). The function should be associative so that it can be computed in parallel. </td>
+</tr>
+<tr>
+  <td> <b>countByValue</b>() </td>
+  <td> When called on a DStream of elements of type K, returns a new DStream of (K, Long) pairs where the value of each key is its frequency in each RDD of the source DStream.  </td>
 </tr>
 <tr>
   <td> <b>groupByKey</b>([<i>numTasks</i>]) </td>
-  <td> When called on a stream of (K, V) pairs, returns a stream of (K, Seq[V]) pairs. <br />
-<b>Note:</b> By default, this uses only 8 parallel tasks to do the grouping. You can pass an optional <code>numTasks</code> argument to set a different number of tasks.
+  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, Seq[V]) pairs by grouping together all the values of each key in the RDDs of the source DStream. <br />
+  <b>Note:</b> By default, this uses Spark's default number of parallel tasks (2 for local machine, 8 for a cluser) to do the grouping. You can pass an optional <code>numTasks</code> argument to set a different number of tasks.
 </td>
 </tr>
 <tr>
   <td> <b>reduceByKey</b>(<i>func</i>, [<i>numTasks</i>]) </td>
-  <td> When called on a stream of (K, V) pairs, returns a stream of (K, V) pairs where the values for each key are aggregated using the given reduce function. Like in <code>groupByKey</code>, the number of reduce tasks is configurable through an optional second argument. </td>
+  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, V) pairs where the values for each key are aggregated using the given reduce function. Like in <code>groupByKey</code>, the number of reduce tasks is configurable through an optional second argument. </td>
 </tr>
 <tr>
   <td> <b>join</b>(<i>otherStream</i>, [<i>numTasks</i>]) </td>
-  <td> When called on streams of type (K, V) and (K, W), returns a stream of (K, (V, W)) pairs with all pairs of elements for each key. </td>
+  <td> When called on two DStreams of (K, V) and (K, W) pairs, returns a new DStream of (K, (V, W)) pairs with all pairs of elements for each key. </td>
 </tr>
 <tr>
   <td> <b>cogroup</b>(<i>otherStream</i>, [<i>numTasks</i>]) </td>
-  <td> When called on DStream of type (K, V) and (K, W), returns a DStream of (K, Seq[V], Seq[W]) tuples.</td>
-</tr>
-<tr>
-  <td> <b>reduce</b>(<i>func</i>) </td>
-  <td> Returns a new DStream of single-element RDDs by aggregating the elements of the stream using a function func (which takes two arguments and returns one). The function should be associative so that it can be computed correctly in parallel. </td>
+  <td> When called on DStream of (K, V) and (K, W) pairs, returns a new DStream of (K, Seq[V], Seq[W]) tuples.</td>
 </tr>
 <tr>
   <td> <b>transform</b>(<i>func</i>) </td>
   <td> Returns a new DStream by applying func (a RDD-to-RDD function) to every RDD of the stream. This can be used to do arbitrary RDD operations on the DStream. </td>
 </tr>
+<tr>
+  <td> <b>updateStateByKey</b>(<i>func</i>) </td>
+  <td> Return a new "state" DStream where the state for each key is updated by applying the given function on the previous state of the key and the new values of each key. This can be used to track session state by using the session-id as the key and updating the session state as new data is received.</td>
+</tr>
+
 </table>
 
-Spark Streaming features windowed computations, which allow you to report statistics over a sliding window of data. All window functions take a <i>windowDuration</i>, which represents the width of the window and a <i>slideTime</i>, which represents the frequency during which the window is calculated.
+Spark Streaming features windowed computations, which allow you to apply transformations over a sliding window of data. All window functions take a <i>windowDuration</i>, which represents the width of the window and a <i>slideTime</i>, which represents the frequency during which the window is calculated.
 
 <table class="table">
-<tr><th style="width:25%">Transformation</th><th>Meaning</th></tr>
+<tr><th style="width:30%">Transformation</th><th>Meaning</th></tr>
 <tr>
-  <td> <b>window</b>(<i>windowDuration</i>, </i>slideTime</i>) </td>
-  <td> Return a new stream which is computed based on windowed batches of the source stream. <i>windowDuration</i> is the width of the window and <i>slideTime</i> is the frequency during which the window is calculated. Both times must be multiples of the batch interval.
+  <td> <b>window</b>(<i>windowDuration</i>, </i>slideDuration</i>) </td>
+  <td> Return a new DStream which is computed based on windowed batches of the source DStream. <i>windowDuration</i> is the width of the window and <i>slideTime</i> is the frequency during which the window is calculated. Both times must be multiples of the batch interval.
   </td>
 </tr>
 <tr>
-  <td> <b>countByWindow</b>(<i>windowDuration</i>, </i>slideTime</i>) </td>
+  <td> <b>countByWindow</b>(<i>windowDuration</i>, </i>slideDuration</i>) </td>
   <td> Return a sliding count of elements in the stream. <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
   </td>
 </tr>
 <tr>
-  <td> <b>reduceByWindow</b>(<i>func</i>, <i>windowDuration</i>, </i>slideDuration</i>) </td>
+  <td> <b>reduceByWindow</b>(<i>func</i>, <i>windowDuration</i>, <i>slideDuration</i>) </td>
   <td> Return a new single-element stream, created by aggregating elements in the stream over a sliding interval using <i>func</i>. The function should be associative so that it can be computed correctly in parallel. <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
   </td>
 </tr>
 <tr>
-  <td> <b>groupByKeyAndWindow</b>(windowDuration, slideDuration, [<i>numTasks</i>])
+  <td> <b>groupByKeyAndWindow</b>(<i>windowDuration</i>, <i>slideDuration</i>, [<i>numTasks</i>])
   </td>
-  <td> When called on a stream of (K, V) pairs, returns a stream of (K, Seq[V]) pairs over a sliding window. <br />
-<b>Note:</b> By default, this uses only 8 parallel tasks to do the grouping. You can pass an optional <code>numTasks</code> argument to set a different number of tasks. <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
+  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, Seq[V]) pairs by grouping together values of each key over batches in a sliding window. <br />
+<b>Note:</b> By default, this uses Spark's default number of parallel tasks (2 for local machine, 8 for a cluser) to do the grouping. You can pass an optional <code>numTasks</code> argument to set a different number of tasks.</td>
+</tr>
+<tr>
+  <td> <b>reduceByKeyAndWindow</b>(<i>func</i>, <i>windowDuration</i>, <i>slideDuration</i>, [<i>numTasks</i>]) </td>
+  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, V) pairs where the values for each key are aggregated using the given reduce function <i>func</i> over batches in a sliding window. Like in <code>groupByKeyAndWindow</code>, the number of reduce tasks is configurable through an optional second argument.
+ <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
+</td> 
+</tr>
+<tr>
+  <td> <b>reduceByKeyAndWindow</b>(<i>func</i>, <i>invFunc</i>, <i>windowDuration</i>, <i>slideDuration</i>, [<i>numTasks</i>]) </td>
+  <td> A more efficient version of the above <code>reduceByKeyAndWindow()</code> where the reduce value of each window is calculated
+  incrementally using the reduce values of the previous window. This is done by reducing the new data that enter the sliding window, and "inverse reducing" the old data that leave the window. An example would be that of "adding" and "subtracting" counts of keys as the window slides. However, it is applicable to only "invertible reduce functions", that is, those reduce functions which have a corresponding "inverse reduce" function (taken as parameter <i>invFunc</i>. Like in <code>groupByKeyAndWindow</code>, the number of reduce tasks is configurable through an optional second argument.
+ <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
 </td>
 </tr>
 <tr>
-  <td> <b>reduceByKeyAndWindow</b>(<i>func</i>, [<i>numTasks</i>]) </td>
-  <td> When called on a stream of (K, V) pairs, returns a stream of (K, V) pairs where the values for each key are aggregated using the given reduce function over batches within a sliding window. Like in <code>groupByKeyAndWindow</code>, the number of reduce tasks is configurable through an optional second argument. 
+  <td> <b>countByValueAndWindow</b>(<i>windowDuration</i>, <i>slideDuration</i>, [<i>numTasks</i>]) </td>
+  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, Long) pairs where the value of each key is its frequency within a sliding window. Like in <code>groupByKeyAndWindow</code>, the number of reduce tasks is configurable through an optional second argument.
  <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
-</td> 
-</tr>
-<tr>
-  <td> <b>countByKeyAndWindow</b>([<i>numTasks</i>]) </td>
-  <td> When called on a stream of (K, V) pairs, returns a stream of (K, Int) pairs where the values for each key are the count within a sliding window. Like in <code>countByKeyAndWindow</code>, the number of reduce tasks is configurable through an optional second argument. 
- <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
-</td> 
+</td>
 </tr>
 
 </table>
@@ -147,7 +166,7 @@ A complete list of DStream operations is available in the API documentation of [
 When an output operator is called, it triggers the computation of a stream. Currently the following output operators are defined:
 
 <table class="table">
-<tr><th style="width:25%">Operator</th><th>Meaning</th></tr>
+<tr><th style="width:30%">Operator</th><th>Meaning</th></tr>
 <tr>
   <td> <b>foreach</b>(<i>func</i>) </td>
   <td> The fundamental output operator. Applies a function, <i>func</i>, to each RDD generated from the stream. This function should have side effects, such as printing output, saving the RDD to external files, or writing it over the network to an external system. </td>
@@ -176,11 +195,6 @@ When an output operator is called, it triggers the computation of a stream. Curr
 
 </table>
 
-## DStream Persistence
-Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is, using `persist()` method on a DStream would automatically persist every RDD of that DStream in memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple DStream operations on the same data). For window-based operations like `reduceByWindow` and `reduceByKeyAndWindow` and state-based operations like `updateStateByKey`, this is implicitly true. Hence, DStreams generated by window-based operations are automatically persisted in memory, without the developer calling `persist()`.
-
-Note that, unlike RDDs, the default persistence level of DStreams keeps the data serialized in memory. This is further discussed in the [Performance Tuning](#memory-tuning) section. More information on different persistence levels can be found in [Spark Programming Guide](scala-programming-guide.html#rdd-persistence).
-
 # Starting the Streaming computation
 All the above DStream operations are completely lazy, that is, the operations will start executing only after the context is started by using
 {% highlight scala %}
@@ -192,8 +206,8 @@ Conversely, the computation can be stopped by using
 ssc.stop()
 {% endhighlight %}
 
-# Example - NetworkWordCount.scala
-A good example to start off is the spark.streaming.examples.NetworkWordCount. This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in <Spark repo>/streaming/src/main/scala/spark/streaming/examples/WordCountNetwork.scala.
+# Example
+A simple example to start off is the [NetworkWordCount](https://github.com/mesos/spark/tree/master/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala). This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/WordCountNetwork.scala` .
 
 {% highlight scala %}
 import spark.streaming.{Seconds, StreamingContext}
@@ -260,6 +274,31 @@ Time: 1357008430000 ms
 </td>
 </table>
 
+You can find more examples in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/`. They can be run in the similar manner using `./run spark.streaming.examples....` . Executing without any parameter would give the required parameter list. Further explanation to run them can be found in comments in the files.
+
+# DStream Persistence
+Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is, using `persist()` method on a DStream would automatically persist every RDD of that DStream in memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple operations on the same data). For window-based operations like `reduceByWindow` and `reduceByKeyAndWindow` and state-based operations like `updateStateByKey`, this is implicitly true. Hence, DStreams generated by window-based operations are automatically persisted in memory, without the developer calling `persist()`.
+
+For input streams that receive data from the network (that is, subclasses of NetworkInputDStream like FlumeInputDStream and KafkaInputDStream), the default persistence level is set to replicate the data to two nodes for fault-tolerance.
+
+Note that, unlike RDDs, the default persistence level of DStreams keeps the data serialized in memory. This is further discussed in the [Performance Tuning](#memory-tuning) section. More information on different persistence levels can be found in [Spark Programming Guide](scala-programming-guide.html#rdd-persistence).
+
+# RDD Checkpointing within DStreams
+DStreams created by stateful operations like `updateStateByKey` require the RDDs in the DStream to be periodically saved to HDFS files for checkpointing. This is because, unless checkpointed, the lineage of operations of the state RDDs can increase indefinitely (since each RDD in the DStream depends on the previous RDD). This leads to two problems - (i) the size of Spark tasks increase proportionally with the RDD lineage leading higher task launch times, (ii) no limit on the amount of recomputation required on failure. Checkpointing RDDs at some interval by writing them to HDFS allows the lineage to be truncated. Note that checkpointing also incurs the cost of saving to HDFS which may cause the corresponding batch to take longer to process. Hence, the interval of checkpointing needs to be set carefully. At small batch sizes (say 1 second), checkpointing every batch may significantly reduce operation throughput. Conversely, checkpointing too slowly causes the lineage and task sizes to grow which may have detrimental effects. Typically, a checkpoint interval of 5 - 10 times of sliding interval of a DStream is good setting to try.
+
+To enable checkpointing, the developer has to provide the HDFS path to which RDD will be saved. This is done by using
+
+{% highlight scala %}
+ssc.checkpoint(hdfsPath) // assuming ssc is the StreamingContext
+{% endhighlight %}
+
+The interval of checkpointing of a DStream can be set by using
+
+{% highlight scala %}
+dstream.checkpoint(checkpointInterval) // checkpointInterval must be a multiple of slide duration of dstream
+{% endhighlight %}
+
+For DStreams that must be checkpointed (that is, DStreams created by `updateStateByKey` and `reduceByKeyAndWindow` with inverse function), the checkpoint interval of the DStream is by default set to a multiple of the DStream's sliding interval such that its at least 10 seconds.
 
 
 # Performance Tuning
@@ -273,17 +312,21 @@ Getting the best performance of a Spark Streaming application on a cluster requi
 There are a number of optimizations that can be done in Spark to minimize the processing time of each batch. These have been discussed in detail in [Tuning Guide](tuning.html). This section highlights some of the most important ones.
 
 ### Level of Parallelism
-Cluster resources maybe underutilized if the number of parallel tasks used in any stage of the computation is not high enough. For example, for distributed reduce operations like `reduceByKey` and `reduceByKeyAndWindow`, the default number of parallel tasks is 8. You can pass the level of parallelism as an argument (see the [`spark.PairDStreamFunctions`](api/streaming/index.html#spark.PairDStreamFunctions) documentation), or set the system property `spark.default.parallelism` to change the default.
+Cluster resources maybe under-utilized if the number of parallel tasks used in any stage of the computation is not high enough. For example, for distributed reduce operations like `reduceByKey` and `reduceByKeyAndWindow`, the default number of parallel tasks is 8. You can pass the level of parallelism as an argument (see the [`spark.PairDStreamFunctions`](api/streaming/index.html#spark.PairDStreamFunctions) documentation), or set the system property `spark.default.parallelism` to change the default.
 
 ### Data Serialization
 The overhead of data serialization can be significant, especially when sub-second batch sizes are to be achieved. There are two aspects to it.
-* Serialization of RDD data in Spark: Please refer to the detailed discussion on data serialization in the [Tuning Guide](tuning.html). However, note that unlike Spark, by default RDDs are persisted as serialized byte arrays to minimize pauses related to GC.
-* Serialization of input data: To ingest external data into Spark, data received as bytes (say, from the network) needs to deserialized from bytes and re-serialized into Spark's serialization format. Hence, the deserialization overhead of input data may be a bottleneck.
+
+* **Serialization of RDD data in Spark**: Please refer to the detailed discussion on data serialization in the [Tuning Guide](tuning.html). However, note that unlike Spark, by default RDDs are persisted as serialized byte arrays to minimize pauses related to GC.
+
+* **Serialization of input data**: To ingest external data into Spark, data received as bytes (say, from the network) needs to deserialized from bytes and re-serialized into Spark's serialization format. Hence, the deserialization overhead of input data may be a bottleneck.
 
 ### Task Launching Overheads
 If the number of tasks launched per second is high (say, 50 or more per second), then the overhead of sending out tasks to the slaves maybe significant and will make it hard to achieve sub-second latencies. The overhead can be reduced by the following changes:
-* Task Serialization: Using Kryo serialization for serializing tasks can reduced the task sizes, and therefore reduce the time taken to send them to the slaves.
-* Execution mode: Running Spark in Standalone mode or coarse-grained Mesos mode leads to better task launch times than the fine-grained Mesos mode. Please refer to the [Running on Mesos guide](running-on-mesos.html) for more details.
+
+* **Task Serialization**: Using Kryo serialization for serializing tasks can reduced the task sizes, and therefore reduce the time taken to send them to the slaves.
+
+* **Execution mode**: Running Spark in Standalone mode or coarse-grained Mesos mode leads to better task launch times than the fine-grained Mesos mode. Please refer to the [Running on Mesos guide](running-on-mesos.html) for more details.
 These changes may reduce batch processing time by 100s of milliseconds, thus allowing sub-second batch size to be viable.
 
 ## Setting the Right Batch Size
@@ -292,22 +335,121 @@ For a Spark Streaming application running on a cluster to be stable, the process
 A good approach to figure out the right batch size for your application is to test it with a conservative batch size (say, 5-10 seconds) and a low data rate. To verify whether the system is able to keep up with data rate, you can check the value of the end-to-end delay experienced by each processed batch (in the Spark master logs, find the line having the phrase "Total delay"). If the delay is maintained to be less than the batch size, then system is stable. Otherwise, if the delay is continuously increasing, it means that the system is unable to keep up and it therefore unstable. Once you have an idea of a stable configuration, you can try increasing the data rate and/or reducing the batch size. Note that momentary increase in the delay due to temporary data rate increases maybe fine as long as the delay reduces back to a low value (i.e., less than batch size).
 
 ## 24/7 Operation
-By default, Spark does not forget any of the metadata (RDDs generated, stages processed, etc.). But for a Spark Streaming application to operate 24/7, it is necessary for Spark to do periodic cleanup of it metadata. This can be enabled by setting the Java system property `spark.cleaner.delay` to the number of minutes you want any metadata to persist. For example, setting `spark.cleaner.delay` to 10 would cause Spark periodically cleanup all metadata and persisted RDDs that are older than 10 minutes. Note, that this property needs to be set before the SparkContext is created.
+By default, Spark does not forget any of the metadata (RDDs generated, stages processed, etc.). But for a Spark Streaming application to operate 24/7, it is necessary for Spark to do periodic cleanup of it metadata. This can be enabled by setting the Java system property `spark.cleaner.delay` to the number of seconds you want any metadata to persist. For example, setting `spark.cleaner.delay` to 600 would cause Spark periodically cleanup all metadata and persisted RDDs that are older than 10 minutes. Note, that this property needs to be set before the SparkContext is created.
 
 This value is closely tied with any window operation that is being used. Any window operation would require the input data to be persisted in memory for at least the duration of the window. Hence it is necessary to set the delay to at least the value of the largest window operation used in the Spark Streaming application. If this delay is set too low, the application will throw an exception saying so.
 
 ## Memory Tuning
 Tuning the memory usage and GC behavior of Spark applications have been discussed in great detail in the [Tuning Guide](tuning.html). It is recommended that you read that. In this section, we highlight a few customizations that are strongly recommended to minimize GC related pauses in Spark Streaming applications and achieving more consistent batch processing times.
 
-* <b>Default persistence level of DStreams</b>: Unlike RDDs, the default persistence level of DStreams serializes the data in memory (that is, [StorageLevel.MEMORY_ONLY_SER](api/core/index.html#spark.storage.StorageLevel$) for DStream compared to [StorageLevel.MEMORY_ONLY](api/core/index.html#spark.storage.StorageLevel$) for RDDs). Even though keeping the data serialized incurs a higher serialization overheads, it significantly reduces GC pauses.
+* **Default persistence level of DStreams**: Unlike RDDs, the default persistence level of DStreams serializes the data in memory (that is, [StorageLevel.MEMORY_ONLY_SER](api/core/index.html#spark.storage.StorageLevel$) for DStream compared to [StorageLevel.MEMORY_ONLY](api/core/index.html#spark.storage.StorageLevel$) for RDDs). Even though keeping the data serialized incurs a higher serialization overheads, it significantly reduces GC pauses.
 
-* <b>Concurrent garbage collector</b>: Using the concurrent mark-and-sweep GC further minimizes the variability of GC pauses. Even though concurrent GC is known to reduce the overall processing throughput of the system, its use is still recommended to achieve more consistent batch processing times.
+* **Concurrent garbage collector**: Using the concurrent mark-and-sweep GC further minimizes the variability of GC pauses. Even though concurrent GC is known to reduce the overall processing throughput of the system, its use is still recommended to achieve more consistent batch processing times.
 
-# Master Fault-tolerance (Alpha)
-TODO
+# Fault-tolerance Properties
+There are two aspects to fault-tolerance - failure of a worker node and that of a driver node. In this section, we are going to discuss the fault-tolerance behavior and the semantics of the processed data.
 
-* Checkpointing of DStream graph
+## Failure of a Worker Node
+In case of the worker node failure, none of the processed data will be lost because
 
-* Recovery from master faults
+1. All the input data is fault-tolerant (either the data is on HDFS, or it replicated Spark Streaming if received from the network)
+1. All intermediate data is expressed as RDDs with their lineage to the input data, which allows Spark to recompute any part of the intermediate data is lost to worker node failure.
 
-* Current state and future directions
\ No newline at end of file
+If the worker node where a network data receiver is running fails, then the receiver will be restarted on a different node and it will continue to receive data. However, data that was accepted by the receiver but not yet replicated to other Spark nodes may be lost, which is a fraction of a second of data.
+
+Since all data is modeled as RDDs with their lineage of deterministic operations, any recomputation always leads to the same result. As a result, all DStream transformations are guaranteed to have _exactly-once_ semantics. That is, the final transformed result will be same even if there were was a worker node failure. However, output operations (like `foreach`) have _at-least once_ semantics, that is, the transformed data may get written to an external entity more than once in the event of a worker failure. While this is acceptable for saving to HDFS using the `saveAs*Files` operations (as the file will simply get over-written by the same data), additional transactions-like mechanisms may be necessary to achieve exactly-once semantics for output operations.
+
+## Failure of a Driver Node
+A system that is required to operate 24/7 needs to be able tolerate the failure of the drive node as well. Spark Streaming does this by saving the state of the DStream computation periodically to a HDFS file, that can be used to restart the streaming computation in the event of a failure of the driver node. To elaborate, the following state is periodically saved to a file.
+
+1. The DStream operator graph (input streams, output streams, etc.)
+1. The configuration of each DStream (checkpoint interval, etc.)
+1. The RDD checkpoint files of each DStream
+
+All this is periodically saved in the file `<checkpoint directory>/graph` where `<checkpoint directory>` is the HDFS path set using `ssc.checkpoint(...)` as described earlier. To recover, a new Streaming Context can be created with this directory by using
+
+{% highlight scala %}
+val ssc = new StreamingContext(checkpointDirectory)
+{% endhighlight %}
+
+Calling `ssc.start()` on this new context will restart the receivers and the stream computations.
+
+In case of stateful operations (that is, `updateStateByKey` and `reduceByKeyAndWindow` with inverse function), the intermediate data at the time of failure also needs to be recomputed.This requires two things - (i) the RDD checkpoints and (ii) the data received since the checkpoints. In the current _alpha_ release, the input data received from the network is not saved durably across driver failures (the data is only replicated in memory of the worker processes and gets lost when the driver fails). Only with file input streams (where the data is already durably stored) is the recovery from driver failure complete and all intermediate data is recomputed. In a future release, this will be true for all input streams. Note that for non-stateful operations, with _all_ input streams, the system will recover and continue receiving and processing new data.
+
+To understand the behavior of the system under driver failure, lets consider what will happen with a file input stream Specifically, in the case of the file input stream, it will correctly identify new files that were created while the driver was down and process them in the same way as it would have if the driver had not failed. To explain further in the case of file input stream, we shall use an example. Lets say, files are being generated every second, and a Spark Streaming program reads every new file and output the number of lines in the file. This is what the sequence of outputs would be with and without a driver failure.
+
+<table class="table">
+    <!-- Results table headers -->
+    <tr>
+      <th> Time </th>
+      <th> Number of lines in input file </th>
+      <th> Output without driver failure </th>
+      <th> Output with driver failure </th>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td>10</td>
+      <td>10</td>
+      <td>10</td>
+    </tr>
+    <tr>
+      <td>2</td>
+      <td>20</td>
+      <td>20</td>
+      <td>20</td>
+    </tr>
+    <tr>
+      <td>3</td>
+      <td>30</td>
+      <td>30</td>
+      <td>30</td>
+    </tr>
+    <tr>
+      <td>4</td>
+      <td>40</td>
+      <td>40</td>
+      <td>[DRIVER FAILS]<br />no output</td>
+    </tr>
+    <tr>
+      <td>5</td>
+      <td>50</td>
+      <td>50</td>
+      <td>no output</td>
+    </tr>
+    <tr>
+      <td>6</td>
+      <td>60</td>
+      <td>60</td>
+      <td>no output</td>
+    </tr>
+    <tr>
+      <td>7</td>
+      <td>70</td>
+      <td>70</td>
+      <td>[DRIVER RECOVERS]<br />40, 50, 60, 70</td>
+    </tr>
+    <tr>
+      <td>8</td>
+      <td>80</td>
+      <td>80</td>
+      <td>80</td>
+    </tr>
+    <tr>
+      <td>9</td>
+      <td>90</td>
+      <td>90</td>
+      <td>90</td>
+    </tr>
+    <tr>
+      <td>10</td>
+      <td>100</td>
+      <td>100</td>
+      <td>100</td>
+    </tr>
+</table>
+
+If the driver had crashed in the middle of the processing of time 3, then it will process time 3 and output 30 after recovery.
+
+# Where to Go from Here
+* Documentation - [Scala and Java](api/streaming/index.html)
+* More examples - [Scala](https://github.com/mesos/spark/tree/master/examples/src/main/scala/spark/streaming/examples) and [Java](https://github.com/mesos/spark/tree/master/examples/src/main/java/spark/streaming/examples)
\ No newline at end of file
diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/spark/streaming/DStream.scala
index 84e4b5bedb..e1be5ef51c 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/spark/streaming/DStream.scala
@@ -132,7 +132,7 @@ abstract class DStream[T: ClassManifest] (
 
     // Set the checkpoint interval to be slideDuration or 10 seconds, which ever is larger
     if (mustCheckpoint && checkpointDuration == null) {
-      checkpointDuration = slideDuration.max(Seconds(10))
+      checkpointDuration = slideDuration * math.ceil(Seconds(10) / slideDuration).toInt
       logInfo("Checkpoint interval automatically set to " + checkpointDuration)
     }
 
diff --git a/streaming/src/main/scala/spark/streaming/Duration.scala b/streaming/src/main/scala/spark/streaming/Duration.scala
index e4dc579a17..ee26206e24 100644
--- a/streaming/src/main/scala/spark/streaming/Duration.scala
+++ b/streaming/src/main/scala/spark/streaming/Duration.scala
@@ -16,7 +16,7 @@ case class Duration (private val millis: Long) {
 
   def * (times: Int): Duration = new Duration(millis * times)
 
-  def / (that: Duration): Long = millis / that.millis
+  def / (that: Duration): Double = millis.toDouble / that.millis.toDouble
 
   def isMultipleOf(that: Duration): Boolean =
     (this.millis % that.millis == 0)
diff --git a/streaming/src/main/scala/spark/streaming/Interval.scala b/streaming/src/main/scala/spark/streaming/Interval.scala
index dc21dfb722..6a8b81760e 100644
--- a/streaming/src/main/scala/spark/streaming/Interval.scala
+++ b/streaming/src/main/scala/spark/streaming/Interval.scala
@@ -30,6 +30,7 @@ class Interval(val beginTime: Time, val endTime: Time) {
   override def toString = "[" + beginTime + ", " + endTime + "]"
 }
 
+private[streaming]
 object Interval {
   def currentInterval(duration: Duration): Interval  = {
     val time = new Time(System.currentTimeMillis)
diff --git a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala b/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
index 5127db3bbc..5a2dd46fa0 100644
--- a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
@@ -18,8 +18,8 @@ import org.apache.hadoop.conf.Configuration
 
 class PairDStreamFunctions[K: ClassManifest, V: ClassManifest](self: DStream[(K,V)])
 extends Serializable {
- 
-  def ssc = self.ssc
+
+  private[streaming] def ssc = self.ssc
 
   private[streaming] def defaultPartitioner(numPartitions: Int = self.ssc.sc.defaultParallelism) = {
     new HashPartitioner(numPartitions)
@@ -242,7 +242,9 @@ extends Serializable {
    * Return a new DStream by applying incremental `reduceByKey` over a sliding window.
    * The reduced value of over a new window is calculated using the old window's reduced value :
    *  1. reduce the new values that entered the window (e.g., adding new counts)
+   *
    *  2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
+   *
    * This is more efficient than reduceByKeyAndWindow without "inverse reduce" function.
    * However, it is applicable to only "invertible reduce functions".
    * Hash partitioning is used to generate the RDDs with Spark's default number of partitions.
@@ -399,7 +401,7 @@ extends Serializable {
   }
 
   /**
-   * Cogroup `this` DStream with `other` DStream. For each key k in corresponding RDDs of `this`
+   * Cogroup `this` DStream with `other` DStream using a partitioner. For each key k in corresponding RDDs of `this`
    * or `other` DStreams, the generated RDD will contains a tuple with the list of values for that
    * key in both RDDs. Partitioner is used to partition each generated RDD.
    */
diff --git a/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
index e70822e5c3..0e21b7480c 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
@@ -13,6 +13,7 @@ import twitter4j.auth.BasicAuthorization
 * An optional set of string filters can be used to restrict the set of tweets. The Twitter API is
 * such that this may return a sampled subset of all tweets during each interval.
 */
+private[streaming]
 class TwitterInputDStream(
     @transient ssc_ : StreamingContext,
     username: String,
@@ -26,6 +27,7 @@ class TwitterInputDStream(
   }
 }
 
+private[streaming]
 class TwitterReceiver(
     username: String,
     password: String,
diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
index 5250667bcb..cac86deeaf 100644
--- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
@@ -50,7 +50,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     val stateStreamCheckpointInterval = Seconds(1)
 
     // this ensure checkpointing occurs at least once
-    val firstNumBatches = (stateStreamCheckpointInterval / batchDuration) * 2
+    val firstNumBatches = (stateStreamCheckpointInterval / batchDuration).toLong * 2
     val secondNumBatches = firstNumBatches
 
     // Setup the streams