spark-instrumented-optimizer/bin/compute-classpath.cmd
Marcelo Vanzin 48cecf673c [SPARK-4048] Enhance and extend hadoop-provided profile.
This change does a few things to make the hadoop-provided profile more useful:

- Create new profiles for other libraries / services that might be provided by the infrastructure
- Simplify and fix the poms so that the profiles are only activated while building assemblies.
- Fix tests so that they're able to run when the profiles are activated
- Add a new env variable to be used by distributions that use these profiles to provide the runtime
  classpath for Spark jobs and daemons.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #2982 from vanzin/SPARK-4048 and squashes the following commits:

82eb688 [Marcelo Vanzin] Add a comment.
eb228c0 [Marcelo Vanzin] Fix borked merge.
4e38f4e [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
9ef79a3 [Marcelo Vanzin] Alternative way to propagate test classpath to child processes.
371ebee [Marcelo Vanzin] Review feedback.
52f366d [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
83099fc [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
7377e7b [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
322f882 [Marcelo Vanzin] Fix merge fail.
f24e9e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
8b00b6a [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
9640503 [Marcelo Vanzin] Cleanup child process log message.
115fde5 [Marcelo Vanzin] Simplify a comment (and make it consistent with another pom).
e3ab2da [Marcelo Vanzin] Fix hive-thriftserver profile.
7820d58 [Marcelo Vanzin] Fix CliSuite with provided profiles.
1be73d4 [Marcelo Vanzin] Restore flume-provided profile.
d1399ed [Marcelo Vanzin] Restore jetty dependency.
82a54b9 [Marcelo Vanzin] Remove unused profile.
5c54a25 [Marcelo Vanzin] Fix HiveThriftServer2Suite with *-provided profiles.
1fc4d0b [Marcelo Vanzin] Update dependencies for hive-thriftserver.
f7b3bbe [Marcelo Vanzin] Add snappy to hadoop-provided list.
9e4e001 [Marcelo Vanzin] Remove duplicate hive profile.
d928d62 [Marcelo Vanzin] Redirect child stderr to parent's log.
4d67469 [Marcelo Vanzin] Propagate SPARK_DIST_CLASSPATH on Yarn.
417d90e [Marcelo Vanzin] Introduce "SPARK_DIST_CLASSPATH".
2f95f0d [Marcelo Vanzin] Propagate classpath to child processes during testing.
1adf91c [Marcelo Vanzin] Re-enable maven-install-plugin for a few projects.
284dda6 [Marcelo Vanzin] Rework the "hadoop-provided" profile, add new ones.
2015-01-08 17:15:13 -08:00

125 lines
5.4 KiB
Batchfile

@echo off
rem
rem Licensed to the Apache Software Foundation (ASF) under one or more
rem contributor license agreements. See the NOTICE file distributed with
rem this work for additional information regarding copyright ownership.
rem The ASF licenses this file to You under the Apache License, Version 2.0
rem (the "License"); you may not use this file except in compliance with
rem the License. You may obtain a copy of the License at
rem
rem http://www.apache.org/licenses/LICENSE-2.0
rem
rem Unless required by applicable law or agreed to in writing, software
rem distributed under the License is distributed on an "AS IS" BASIS,
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
rem See the License for the specific language governing permissions and
rem limitations under the License.
rem
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
rem script and the ExecutorRunner in standalone cluster mode.
rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
rem need to set it here because we use !datanucleus_jars! below.
if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
setlocal enabledelayedexpansion
:skip_delayed_expansion
set SCALA_VERSION=2.10
rem Figure out where the Spark framework is installed
set FWDIR=%~dp0..\
rem Load environment variables from conf\spark-env.cmd, if it exists
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
rem Build up classpath
set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%
if not "x%SPARK_CONF_DIR%"=="x" (
set CLASSPATH=%CLASSPATH%;%SPARK_CONF_DIR%
) else (
set CLASSPATH=%CLASSPATH%;%FWDIR%conf
)
if exist "%FWDIR%RELEASE" (
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
set ASSEMBLY_JAR=%%d
)
) else (
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
set ASSEMBLY_JAR=%%d
)
)
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
rem When Hive support is needed, Datanucleus jars must be included on the classpath.
rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
rem built with Hive, so look for them there.
if exist "%FWDIR%RELEASE" (
set datanucleus_dir=%FWDIR%lib
) else (
set datanucleus_dir=%FWDIR%lib_managed\jars
)
set "datanucleus_jars="
for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
set datanucleus_jars=!datanucleus_jars!;%%d
)
set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
if "x%SPARK_TESTING%"=="x1" (
rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
rem so that local compilation takes precedence over assembled jar
set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
)
rem Add hadoop conf dir - else FileSystem.*, etc fail
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
rem the configurtion files.
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
:no_hadoop_conf_dir
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
:no_yarn_conf_dir
rem To allow for distributions to append needed libraries to the classpath (e.g. when
rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
rem append it to tbe final classpath.
if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
)
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
echo %CLASSPATH%
:exit