acf4bc1caa
Based on https://github.com/apache/spark/pull/5478 that provide a PYSPARK_ARCHIVES_PATH env. within this PR, we just should export PYSPARK_ARCHIVES_PATH=/user/spark/pyspark.zip,/user/spark/python/lib/py4j-0.8.2.1-src.zip in conf/spark-env.sh when we don't install PySpark on each node of Yarn. i run python application successfully on yarn-client and yarn-cluster with this PR.
andrewor14 sryza Sephiroth-Lin Can you take a look at this?thanks.
Author: Lianhui Wang <lianhuiwang09@gmail.com>
Closes #5580 from lianhuiwang/SPARK-6869 and squashes the following commits:
66ffa43 [Lianhui Wang] Update Client.scala
c2ad0f9 [Lianhui Wang] Update Client.scala
1c8f664 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
008850a [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
f0b4ed8 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
150907b [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
20402cd [Lianhui Wang] use ZipEntry
9d87c3f [Lianhui Wang] update scala style
e7bd971 [Lianhui Wang] address vanzin's comments
4b8a3ed [Lianhui Wang] use pyArchivesEnvOpt
e6b573b [Lianhui Wang] address vanzin's comments
f11f84a [Lianhui Wang] zip pyspark archives
5192cca [Lianhui Wang] update import path
3b1e4c8 [Lianhui Wang] address tgravescs's comments
9396346 [Lianhui Wang] put zip to make-distribution.sh
0d2baf7 [Lianhui Wang] update import paths
e0179be [Lianhui Wang] add zip pyspark archives in build or sparksubmit
31e8e06 [Lianhui Wang] update code style
9f31dac [Lianhui Wang] update code and add comments
f72987c [Lianhui Wang] add archives path to PYTHONPATH
(cherry picked from commit ebff7327af
)
Signed-off-by: Thomas Graves <tgraves@apache.org>
258 lines
9.2 KiB
XML
258 lines
9.2 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
~ Licensed to the Apache Software Foundation (ASF) under one or more
|
|
~ contributor license agreements. See the NOTICE file distributed with
|
|
~ this work for additional information regarding copyright ownership.
|
|
~ The ASF licenses this file to You under the Apache License, Version 2.0
|
|
~ (the "License"); you may not use this file except in compliance with
|
|
~ the License. You may obtain a copy of the License at
|
|
~
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
~
|
|
~ Unless required by applicable law or agreed to in writing, software
|
|
~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
~ See the License for the specific language governing permissions and
|
|
~ limitations under the License.
|
|
-->
|
|
|
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
<modelVersion>4.0.0</modelVersion>
|
|
<parent>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-parent_2.10</artifactId>
|
|
<version>1.4.0-SNAPSHOT</version>
|
|
<relativePath>../pom.xml</relativePath>
|
|
</parent>
|
|
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-assembly_2.10</artifactId>
|
|
<name>Spark Project Assembly</name>
|
|
<url>http://spark.apache.org/</url>
|
|
<packaging>pom</packaging>
|
|
|
|
<properties>
|
|
<sbt.project.name>assembly</sbt.project.name>
|
|
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
|
|
<spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
|
|
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
|
|
</properties>
|
|
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-bagel_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-mllib_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-repl_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
|
|
<build>
|
|
<plugins>
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-deploy-plugin</artifactId>
|
|
<configuration>
|
|
<skip>true</skip>
|
|
</configuration>
|
|
</plugin>
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-install-plugin</artifactId>
|
|
<configuration>
|
|
<skip>true</skip>
|
|
</configuration>
|
|
</plugin>
|
|
<!-- zip pyspark archives to run python application on yarn mode -->
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-antrun-plugin</artifactId>
|
|
<executions>
|
|
<execution>
|
|
<phase>package</phase>
|
|
<goals>
|
|
<goal>run</goal>
|
|
</goals>
|
|
</execution>
|
|
</executions>
|
|
<configuration>
|
|
<target>
|
|
<delete dir="${basedir}/../python/lib/pyspark.zip"/>
|
|
<zip destfile="${basedir}/../python/lib/pyspark.zip">
|
|
<fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
|
|
</zip>
|
|
</target>
|
|
</configuration>
|
|
</plugin>
|
|
<!-- Use the shade plugin to create a big JAR with all the dependencies -->
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-shade-plugin</artifactId>
|
|
<configuration>
|
|
<shadedArtifactAttached>false</shadedArtifactAttached>
|
|
<outputFile>${spark.jar}</outputFile>
|
|
<artifactSet>
|
|
<includes>
|
|
<include>*:*</include>
|
|
</includes>
|
|
</artifactSet>
|
|
<filters>
|
|
<filter>
|
|
<artifact>*:*</artifact>
|
|
<excludes>
|
|
<exclude>org/datanucleus/**</exclude>
|
|
<exclude>META-INF/*.SF</exclude>
|
|
<exclude>META-INF/*.DSA</exclude>
|
|
<exclude>META-INF/*.RSA</exclude>
|
|
</excludes>
|
|
</filter>
|
|
</filters>
|
|
</configuration>
|
|
<executions>
|
|
<execution>
|
|
<phase>package</phase>
|
|
<goals>
|
|
<goal>shade</goal>
|
|
</goals>
|
|
<configuration>
|
|
<transformers>
|
|
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
|
|
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
|
|
<resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
|
|
</transformer>
|
|
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
|
|
<resource>reference.conf</resource>
|
|
</transformer>
|
|
<transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
|
|
<resource>log4j.properties</resource>
|
|
</transformer>
|
|
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
|
|
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
|
|
</transformers>
|
|
</configuration>
|
|
</execution>
|
|
</executions>
|
|
</plugin>
|
|
</plugins>
|
|
</build>
|
|
|
|
<profiles>
|
|
<profile>
|
|
<id>yarn</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-yarn_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>hive</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-hive_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>hive-thriftserver</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>spark-ganglia-lgpl</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-ganglia-lgpl_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>bigtop-dist</id>
|
|
<!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
|
|
that contains Spark but not the Hadoop JARs it depends on. -->
|
|
<build>
|
|
<plugins>
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-assembly-plugin</artifactId>
|
|
<executions>
|
|
<execution>
|
|
<id>dist</id>
|
|
<phase>package</phase>
|
|
<goals>
|
|
<goal>single</goal>
|
|
</goals>
|
|
<configuration>
|
|
<descriptors>
|
|
<descriptor>src/main/assembly/assembly.xml</descriptor>
|
|
</descriptors>
|
|
</configuration>
|
|
</execution>
|
|
</executions>
|
|
</plugin>
|
|
</plugins>
|
|
</build>
|
|
</profile>
|
|
|
|
<!-- Profiles that disable inclusion of certain dependencies. -->
|
|
<profile>
|
|
<id>hadoop-provided</id>
|
|
<properties>
|
|
<hadoop.deps.scope>provided</hadoop.deps.scope>
|
|
</properties>
|
|
</profile>
|
|
<profile>
|
|
<id>hive-provided</id>
|
|
<properties>
|
|
<hive.deps.scope>provided</hive.deps.scope>
|
|
</properties>
|
|
</profile>
|
|
<profile>
|
|
<id>parquet-provided</id>
|
|
<properties>
|
|
<parquet.deps.scope>provided</parquet.deps.scope>
|
|
</properties>
|
|
</profile>
|
|
</profiles>
|
|
</project>
|