48cecf673c
This change does a few things to make the hadoop-provided profile more useful: - Create new profiles for other libraries / services that might be provided by the infrastructure - Simplify and fix the poms so that the profiles are only activated while building assemblies. - Fix tests so that they're able to run when the profiles are activated - Add a new env variable to be used by distributions that use these profiles to provide the runtime classpath for Spark jobs and daemons. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #2982 from vanzin/SPARK-4048 and squashes the following commits: 82eb688 [Marcelo Vanzin] Add a comment. eb228c0 [Marcelo Vanzin] Fix borked merge. 4e38f4e [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 9ef79a3 [Marcelo Vanzin] Alternative way to propagate test classpath to child processes. 371ebee [Marcelo Vanzin] Review feedback. 52f366d [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 83099fc [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 7377e7b [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 322f882 [Marcelo Vanzin] Fix merge fail. f24e9e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 8b00b6a [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 9640503 [Marcelo Vanzin] Cleanup child process log message. 115fde5 [Marcelo Vanzin] Simplify a comment (and make it consistent with another pom). e3ab2da [Marcelo Vanzin] Fix hive-thriftserver profile. 7820d58 [Marcelo Vanzin] Fix CliSuite with provided profiles. 1be73d4 [Marcelo Vanzin] Restore flume-provided profile. d1399ed [Marcelo Vanzin] Restore jetty dependency. 82a54b9 [Marcelo Vanzin] Remove unused profile. 5c54a25 [Marcelo Vanzin] Fix HiveThriftServer2Suite with *-provided profiles. 1fc4d0b [Marcelo Vanzin] Update dependencies for hive-thriftserver. f7b3bbe [Marcelo Vanzin] Add snappy to hadoop-provided list. 9e4e001 [Marcelo Vanzin] Remove duplicate hive profile. d928d62 [Marcelo Vanzin] Redirect child stderr to parent's log. 4d67469 [Marcelo Vanzin] Propagate SPARK_DIST_CLASSPATH on Yarn. 417d90e [Marcelo Vanzin] Introduce "SPARK_DIST_CLASSPATH". 2f95f0d [Marcelo Vanzin] Propagate classpath to child processes during testing. 1adf91c [Marcelo Vanzin] Re-enable maven-install-plugin for a few projects. 284dda6 [Marcelo Vanzin] Rework the "hadoop-provided" profile, add new ones.
125 lines
5.4 KiB
Batchfile
125 lines
5.4 KiB
Batchfile
@echo off
|
|
|
|
rem
|
|
rem Licensed to the Apache Software Foundation (ASF) under one or more
|
|
rem contributor license agreements. See the NOTICE file distributed with
|
|
rem this work for additional information regarding copyright ownership.
|
|
rem The ASF licenses this file to You under the Apache License, Version 2.0
|
|
rem (the "License"); you may not use this file except in compliance with
|
|
rem the License. You may obtain a copy of the License at
|
|
rem
|
|
rem http://www.apache.org/licenses/LICENSE-2.0
|
|
rem
|
|
rem Unless required by applicable law or agreed to in writing, software
|
|
rem distributed under the License is distributed on an "AS IS" BASIS,
|
|
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
rem See the License for the specific language governing permissions and
|
|
rem limitations under the License.
|
|
rem
|
|
|
|
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
|
|
rem script and the ExecutorRunner in standalone cluster mode.
|
|
|
|
rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
|
|
rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
|
|
rem need to set it here because we use !datanucleus_jars! below.
|
|
if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
|
|
setlocal enabledelayedexpansion
|
|
:skip_delayed_expansion
|
|
|
|
set SCALA_VERSION=2.10
|
|
|
|
rem Figure out where the Spark framework is installed
|
|
set FWDIR=%~dp0..\
|
|
|
|
rem Load environment variables from conf\spark-env.cmd, if it exists
|
|
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
|
|
|
|
rem Build up classpath
|
|
set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%
|
|
|
|
if not "x%SPARK_CONF_DIR%"=="x" (
|
|
set CLASSPATH=%CLASSPATH%;%SPARK_CONF_DIR%
|
|
) else (
|
|
set CLASSPATH=%CLASSPATH%;%FWDIR%conf
|
|
)
|
|
|
|
if exist "%FWDIR%RELEASE" (
|
|
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
|
|
set ASSEMBLY_JAR=%%d
|
|
)
|
|
) else (
|
|
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
|
|
set ASSEMBLY_JAR=%%d
|
|
)
|
|
)
|
|
|
|
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
|
|
|
|
rem When Hive support is needed, Datanucleus jars must be included on the classpath.
|
|
rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
|
|
rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
|
|
rem built with Hive, so look for them there.
|
|
if exist "%FWDIR%RELEASE" (
|
|
set datanucleus_dir=%FWDIR%lib
|
|
) else (
|
|
set datanucleus_dir=%FWDIR%lib_managed\jars
|
|
)
|
|
set "datanucleus_jars="
|
|
for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
|
|
set datanucleus_jars=!datanucleus_jars!;%%d
|
|
)
|
|
set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
|
|
|
|
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
|
|
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
|
|
|
|
set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
|
|
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
|
|
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
|
|
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
|
|
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
|
|
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
|
|
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
|
|
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
|
|
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
|
|
|
|
if "x%SPARK_TESTING%"=="x1" (
|
|
rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
|
|
rem so that local compilation takes precedence over assembled jar
|
|
set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
|
|
)
|
|
|
|
rem Add hadoop conf dir - else FileSystem.*, etc fail
|
|
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
|
|
rem the configurtion files.
|
|
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
|
|
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
|
|
:no_hadoop_conf_dir
|
|
|
|
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
|
|
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
|
|
:no_yarn_conf_dir
|
|
|
|
rem To allow for distributions to append needed libraries to the classpath (e.g. when
|
|
rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
|
|
rem append it to tbe final classpath.
|
|
if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
|
|
set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
|
|
)
|
|
|
|
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
|
|
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
|
|
|
|
echo %CLASSPATH%
|
|
|
|
:exit
|