Run script fixes for Windows after package & assembly change

2013-09-01 23:45:57 +00:00 · 2013-09-01 23:45:57 +00:00 · 3db404a43a
parent f957c26fa2
commit 3db404a43a
10 changed files with 148 additions and 83 deletions
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@ -28,30 +28,27 @@ set FWDIR=%~dp0..\
 rem Load environment variables from conf\spark-env.cmd, if it exists
 if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"

-set CORE_DIR=%FWDIR%core
-set REPL_DIR=%FWDIR%repl
-set EXAMPLES_DIR=%FWDIR%examples
-set BAGEL_DIR=%FWDIR%bagel
-set MLLIB_DIR=%FWDIR%mllib
-set TOOLS_DIR=%FWDIR%tools
-set YARN_DIR=%FWDIR%yarn
-set STREAMING_DIR=%FWDIR%streaming
-set PYSPARK_DIR=%FWDIR%python
-
 rem Build up classpath
-set CLASSPATH=%SPARK_CLASSPATH%;%MESOS_CLASSPATH%;%FWDIR%conf;%CORE_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%CORE_DIR%\target\scala-%SCALA_VERSION%\test-classes;%CORE_DIR%\src\main\resources
-set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\classes;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\test-classes
-set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\lib\org\apache\kafka\kafka\0.7.2-spark\*
-set CLASSPATH=%CLASSPATH%;%REPL_DIR%\target\scala-%SCALA_VERSION%\classes;%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\jars\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
-set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%TOOLS_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%YARN_DIR%\target\scala-%SCALA_VERSION%\classes
+set CLASSPATH=%SPARK_CLASSPATH%;%FWDIR%conf
+if exist "%FWDIR%RELEASE" (
+  for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
+    set ASSEMBLY_JAR=%%d
+  )
+) else (
+  for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
+    set ASSEMBLY_JAR=%%d
+  )
+)
+set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
+
+if "x%SPARK_TESTING%"=="x1" (
+  rem Add test clases to path
+  set CLASSPATH=%CLASSPATH%;%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
+  set CLASSPATH=%CLASSPATH%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
+  set CLASSPATH=%CLASSPATH%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
+  set CLASSPATH=%CLASSPATH%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
+  set CLASSPATH=%CLASSPATH%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
+)

 rem Add hadoop conf dir - else FileSystem.*, etc fail
 rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
@ -64,9 +61,6 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
 :no_yarn_conf_dir

-rem Add Scala standard library
-set CLASSPATH=%CLASSPATH%;%SCALA_HOME%\lib\scala-library.jar;%SCALA_HOME%\lib\scala-compiler.jar;%SCALA_HOME%\lib\jline.jar
-
 rem A bit of a hack to allow calling this script within run2.cmd without seeing output
 if "%DONT_PRINT_CLASSPATH%"=="1" goto exit

--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@ -631,20 +631,26 @@ class SparkContext(
   * filesystems), or an HTTP, HTTPS or FTP URI.
   */
  def addJar(path: String) {
-    if (null == path) {
+    if (path == null) {
      logWarning("null specified as parameter to addJar",
        new SparkException("null specified as parameter to addJar"))
    } else {
-      val env = SparkEnv.get
-      val uri = new URI(path)
-      val key = uri.getScheme match {
-        case null | "file" =>
-          if (env.hadoop.isYarnMode()) {
-            logWarning("local jar specified as parameter to addJar under Yarn mode")
-            return
-          }
-          env.httpFileServer.addJar(new File(uri.getPath))
-        case _ => path
+      var key = ""
+      if (path.contains("\\")) {
+        // For local paths with backslashes on Windows, URI throws an exception
+        key = env.httpFileServer.addJar(new File(path))
+      } else {
+        val uri = new URI(path)
+        key = uri.getScheme match {
+          case null | "file" =>
+            if (env.hadoop.isYarnMode()) {
+              logWarning("local jar specified as parameter to addJar under Yarn mode")
+              return
+            }
+            env.httpFileServer.addJar(new File(uri.getPath))
+          case _ =>
+            path
+        }
      }
      addedJars(key) = System.currentTimeMillis
      logInfo("Added JAR " + path + " at " + key + " with timestamp " + addedJars(key))
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@ -9,7 +9,7 @@ In addition to running on the Mesos or YARN cluster managers, Spark also provide

 You can start a standalone master server by executing:

-    ./spark-class spark.deploy.master.Master
+    ./spark-class org.apache.spark.deploy.master.Master

 Once started, the master will print out a `spark://HOST:PORT` URL for itself, which you can use to connect workers to it,
 or pass as the "master" argument to `SparkContext`. You can also find this URL on
@ -17,7 +17,7 @@ the master's web UI, which is [http://localhost:8080](http://localhost:8080) by

 Similarly, you can start one or more workers and connect them to the master via:

-    ./spark-class spark.deploy.worker.Worker spark://IP:PORT
+    ./spark-class org.apache.spark.deploy.worker.Worker spark://IP:PORT

 Once you have started a worker, look at the master's web UI ([http://localhost:8080](http://localhost:8080) by default).
 You should see the new node listed there, along with its number of CPUs and memory (minus one gigabyte left for the OS).
--- a/run-example.cmd
+++ b/run-example.cmd
@ -0,0 +1,23 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running a Spark example. To avoid polluting
+rem the environment, it just launches a new cmd to do the real work.
+
+cmd /V /E /C %~dp0run-example2.cmd %*
--- a/run-example2.cmd
+++ b/run-example2.cmd
@ -0,0 +1,61 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+set SCALA_VERSION=2.9.3
+
+rem Figure out where the Spark framework is installed
+set FWDIR=%~dp0
+
+rem Export this as SPARK_HOME
+set SPARK_HOME=%FWDIR%
+
+rem Load environment variables from conf\spark-env.cmd, if it exists
+if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
+
+rem Test that an argument was given
+if not "x%1"=="x" goto arg_given
+  echo Usage: run-example ^<example-class^> [^<args^>]
+  goto exit
+:arg_given
+
+set EXAMPLES_DIR=%FWDIR%examples
+
+rem Figure out the JAR file that our examples were packaged into.
+set SPARK_EXAMPLES_JAR=
+for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*assembly*.jar") do (
+  set SPARK_EXAMPLES_JAR=%%d
+)
+if "x%SPARK_EXAMPLES_JAR%"=="x" (
+  echo Failed to find Spark examples assembly JAR.
+  echo You need to build Spark with sbt\sbt assembly before running this program.
+  goto exit
+)
+
+rem Compute Spark classpath using external script
+set DONT_PRINT_CLASSPATH=1
+call "%FWDIR%bin\compute-classpath.cmd"
+set DONT_PRINT_CLASSPATH=0
+set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH%
+
+rem Figure out where java is.
+set RUNNER=java
+if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
+
+"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
+:exit
--- a/sbt/sbt.cmd
+++ b/sbt/sbt.cmd
@ -22,4 +22,4 @@ if not "%MESOS_HOME%x"=="x" set EXTRA_ARGS=-Djava.library.path=%MESOS_HOME%\lib\

 set SPARK_HOME=%~dp0..

-java -Xmx1200M -XX:MaxPermSize=200m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*"
+java -Xmx1200M -XX:MaxPermSize=200m -XX:ReservedCodeCacheSize=256m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*"
--- a/2
+++ b/2
@ -31,7 +31,7 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then
 fi

 if [ -z "$1" ]; then
-  echo "Usage: run <spark-class> [<args>]" >&2
+  echo "Usage: spark-class <class> [<args>]" >&2
  exit 1
 fi

--- a/spark-class.cmd
+++ b/spark-class.cmd
@ -17,4 +17,7 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem

-cmd /V /E /C %~dp0run2.cmd %*
+rem This is the entry point for running a Spark class. To avoid polluting
+rem the environment, it just launches a new cmd to do the real work.
+
+cmd /V /E /C %~dp0spark-class2.cmd %*
--- a/spark-class2.cmd
+++ b/spark-class2.cmd
@ -30,7 +30,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"

 rem Test that an argument was given
 if not "x%1"=="x" goto arg_given
-  echo Usage: run ^<spark-class^> [^<args^>]
+  echo Usage: spark-class ^<class^> [^<args^>]
  goto exit
 :arg_given

@ -44,12 +44,6 @@ rem Do not overwrite SPARK_JAVA_OPTS environment variable in this script
 if "%RUNNING_DAEMON%"=="0" set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS%
 if "%RUNNING_DAEMON%"=="1" set OUR_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS%

-rem Check that SCALA_HOME has been specified
-if not "x%SCALA_HOME%"=="x" goto scala_exists
-  echo SCALA_HOME is not set
-  goto exit
-:scala_exists
-
 rem Figure out how much memory to use per executor and set it as an environment
 rem variable so that our process sees it and can report it to Mesos
 if "x%SPARK_MEM%"=="x" set SPARK_MEM=512m
@ -58,43 +52,27 @@ rem Set JAVA_OPTS to be able to load native libraries and to set heap size
 set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%SPARK_MEM% -Xmx%SPARK_MEM%
 rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!

-set CORE_DIR=%FWDIR%core
-set EXAMPLES_DIR=%FWDIR%examples
-set REPL_DIR=%FWDIR%repl
+rem Test whether the user has built Spark
+if exist "%FWDIR%RELEASE" goto skip_build_test
+set FOUND_JAR=0
+for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
+  set FOUND_JAR=1
+)
+if "%FOUND_JAR%"=="0" (
+  echo Failed to find Spark assembly JAR.
+  echo You need to build Spark with sbt\sbt assembly before running this program.
+  goto exit
+)
+:skip_build_test

 rem Compute classpath using external script
 set DONT_PRINT_CLASSPATH=1
 call "%FWDIR%bin\compute-classpath.cmd"
 set DONT_PRINT_CLASSPATH=0

-rem Figure out the JAR file that our examples were packaged into.
-rem First search in the build path from SBT:
-for %%d in ("examples/target/scala-%SCALA_VERSION%/spark-examples*.jar") do (
-  set SPARK_EXAMPLES_JAR=examples/target/scala-%SCALA_VERSION%/%%d
-)
-rem Then search in the build path from Maven:
-for %%d in ("examples/target/spark-examples*hadoop*.jar") do (
-  set SPARK_EXAMPLES_JAR=examples/target/%%d
-)
+rem Figure out where java is.
+set RUNNER=java
+if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java

-rem Figure out whether to run our class with java or with the scala launcher.
-rem In most cases, we'd prefer to execute our process with java because scala
-rem creates a shell script as the parent of its Java process, which makes it
-rem hard to kill the child with stuff like Process.destroy(). However, for
-rem the Spark shell, the wrapper is necessary to properly reset the terminal
-rem when we exit, so we allow it to set a variable to launch with scala.
-if "%SPARK_LAUNCH_WITH_SCALA%" NEQ 1 goto java_runner
-  set RUNNER=%SCALA_HOME%\bin\scala
-  # Java options will be passed to scala as JAVA_OPTS
-  set EXTRA_ARGS=
-  goto run_spark
-:java_runner
-  set CLASSPATH=%CLASSPATH%;%SCALA_HOME%\lib\scala-library.jar;%SCALA_HOME%\lib\scala-compiler.jar;%SCALA_HOME%\lib\jline.jar
-  set RUNNER=java
-  if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
-  rem The JVM doesn't read JAVA_OPTS by default so we need to pass it in
-  set EXTRA_ARGS=%JAVA_OPTS%
-:run_spark
-
-"%RUNNER%" -cp "%CLASSPATH%" %EXTRA_ARGS% %*
+"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
 :exit
--- a/spark-shell.cmd
+++ b/spark-shell.cmd
@ -18,5 +18,5 @@ rem limitations under the License.
 rem

 set FWDIR=%~dp0
-set SPARK_LAUNCH_WITH_SCALA=1
-cmd /V /E /C %FWDIR%run2.cmd org.apache.spark.repl.Main %*
+
+cmd /V /E /C %FWDIR%spark-class2.cmd org.apache.spark.repl.Main %*