More work on HTTP class loading

2010-09-28 22:32:38 -07:00 · 2010-09-28 22:32:38 -07:00 · e068f21e01
parent 7ef3a20a0c
commit e068f21e01
5 changed files with 61 additions and 24 deletions
--- a/2
+++ b/2
@ -10,6 +10,8 @@ JARS += third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
 JARS += third_party/hadoop-0.20.0/lib/commons-logging-1.0.4.jar
 JARS += third_party/scalatest-1.2/scalatest-1.2.jar
 JARS += third_party/scalacheck_2.8.0-1.7.jar
+JARS += third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
+JARS += third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
 CLASSPATH = $(subst $(SPACE),:,$(JARS))

 SCALA_SOURCES =  src/examples/*.scala src/scala/spark/*.scala src/scala/spark/repl/*.scala
--- a/2
+++ b/2
@ -36,6 +36,8 @@ SPARK_CLASSPATH+=:$FWDIR/third_party/guava-r06/guava-r06.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
 SPARK_CLASSPATH+=:third_party/scalatest-1.2/scalatest-1.2.jar
 SPARK_CLASSPATH+=:third_party/scalacheck_2.8.0-1.7.jar
+SPARK_CLASSPATH+=:third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
+SPARK_CLASSPATH+=:third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
 for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
  SPARK_CLASSPATH+=:$jar
 done
--- a/src/scala/spark/Executor.scala
+++ b/src/scala/spark/Executor.scala
@ -25,10 +25,10 @@ object Executor {
        // If the REPL is in use, create a ClassLoader that will be able to
        // read new classes defined by the REPL as the user types code
        classLoader = this.getClass.getClassLoader
-        val classDir = System.getProperty("spark.repl.current.classdir")
-        if (classDir != null) {
-          println("Using REPL classdir: " + classDir)
-          classLoader = new repl.ExecutorClassLoader(classDir, classLoader)
+        val classUri = System.getProperty("spark.repl.class.uri")
+        if (classUri != null) {
+          println("Using REPL class URI: " + classUri)
+          classLoader = new repl.ExecutorClassLoader(classUri, classLoader)
        }
        Thread.currentThread.setContextClassLoader(classLoader)
        
--- a/src/scala/spark/repl/ExecutorClassLoader.scala
+++ b/src/scala/spark/repl/ExecutorClassLoader.scala
@ -1,7 +1,7 @@
 package spark.repl

 import java.io.{ByteArrayOutputStream, InputStream}
-import java.net.{URI, URL, URLClassLoader}
+import java.net.{URI, URL, URLClassLoader, URLEncoder}
 import java.util.concurrent.{Executors, ExecutorService}

 import org.apache.hadoop.conf.Configuration
@ -12,18 +12,35 @@ import org.objectweb.asm.commons.EmptyVisitor
 import org.objectweb.asm.Opcodes._


-// A ClassLoader that reads classes from a Hadoop FileSystem URL, used to load
-// classes defined by the interpreter when the REPL is in use
-class ExecutorClassLoader(classDir: String, parent: ClassLoader)
+/**
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
+ * used to load classes defined by the interpreter when the REPL is used
+ */
+class ExecutorClassLoader(classUri: String, parent: ClassLoader)
 extends ClassLoader(parent) {
-  val fileSystem = FileSystem.get(new URI(classDir), new Configuration())
-  val directory = new URI(classDir).getPath
+  val uri = new URI(classUri)
+  val directory = uri.getPath
+
+  // Hadoop FileSystem object for our URI, if it isn't using HTTP
+  var fileSystem: FileSystem = {
+    if (uri.getScheme() == "http")
+      null
+    else
+      FileSystem.get(uri, new Configuration())
+  }
  
  override def findClass(name: String): Class[_] = {
    try {
      //println("repl.ExecutorClassLoader resolving " + name)
-      val path = new Path(directory, name.replace('.', '/') + ".class")
-      val bytes = readAndTransformClass(name, fileSystem.open(path))
+      val pathInDirectory = name.replace('.', '/') + ".class"
+      val inputStream = {
+        if (fileSystem != null)
+          fileSystem.open(new Path(directory, pathInDirectory))
+        else
+          new URL(classUri + "/" + urlEncode(pathInDirectory)).openStream()
+      }
+      val bytes = readAndTransformClass(name, inputStream)
+      inputStream.close()
      return defineClass(name, bytes, 0, bytes.length)
    } catch {
      case e: Exception => throw new ClassNotFoundException(name, e)
@ -57,6 +74,13 @@ extends ClassLoader(parent) {
      return bos.toByteArray
    }
  }
+
+  /**
+   * URL-encode a string, preserving only slashes
+   */
+  def urlEncode(str: String): String = {
+    str.split('/').map(part => URLEncoder.encode(part, "UTF-8")).mkString("/")
+  }
 }

 class ConstructorCleaner(className: String, cv: ClassVisitor)
--- a/src/scala/spark/repl/SparkInterpreter.scala
+++ b/src/scala/spark/repl/SparkInterpreter.scala
@ -90,32 +90,40 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
  
  val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")

-  /** directory to save .class files to */
-  //val virtualDirectory = new VirtualDirectory("(memory)", None)
-  val virtualDirectory = {
+  val outputDir = {
    val rootDir = new File(System.getProperty("spark.repl.classdir",
                           System.getProperty("java.io.tmpdir")))
    var attempts = 0
    val maxAttempts = 10
-    var outputDir: File = null
-    while (outputDir == null) {
+    var dir: File = null
+    while (dir == null) {
      attempts += 1
      if (attempts > maxAttempts) {
        throw new IOException("Failed to create a temp directory " +
                              "after " + maxAttempts + " attempts!")
      }
      try {
-        outputDir = new File(rootDir, "spark-" + UUID.randomUUID.toString)
-        if (outputDir.exists() || !outputDir.mkdirs())
-          outputDir = null
+        dir = new File(rootDir, "spark-" + UUID.randomUUID.toString)
+        if (dir.exists() || !dir.mkdirs())
+          dir = null
      } catch { case e: IOException => ; }
    }
-    System.setProperty("spark.repl.current.classdir",
-      "file://" + outputDir.getAbsolutePath + "/")
    if (SPARK_DEBUG_REPL)
-      println("Output directory: " + outputDir)
-    new PlainFile(outputDir)
+      println("Output directory: " + dir)
+    dir
  }
+
+  /** directory to save .class files to */
+  //val virtualDirectory = new VirtualDirectory("(memory)", None)
+  val virtualDirectory = new PlainFile(outputDir)
+
+  /** Jetty server that will serve our classes to worker nodes */
+  val classServer = new ClassServer(outputDir)
+
+  // Start the classServer and remember its URI in a spark system property */
+  classServer.start()
+  println("ClassServer started, URI = " + classServer.uri)
+  System.setProperty("spark.repl.class.uri", classServer.uri)
  
  /** reporter */
  object reporter extends ConsoleReporter(settings, null, out) {
@ -714,6 +722,7 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
   */
  def close() {
    reporter.flush
+    classServer.stop()
  }

  /** A traverser that finds all mentioned identifiers, i.e. things