709387d660
## What changes were proposed in this pull request? This PR introduces the necessary Maven modules for the new [Spark Graph](https://issues.apache.org/jira/browse/SPARK-25994) feature for Spark 3.0. * `spark-graph` is a parent module that users depend on to get all graph functionalities (Cypher and Graph Algorithms) * `spark-graph-api` defines the [Property Graph API](https://docs.google.com/document/d/1Wxzghj0PvpOVu7XD1iA8uonRYhexwn18utdcTxtkxlI) that is being shared between Cypher and Algorithms * `spark-cypher` contains a Cypher query engine implementation Both, `spark-graph-api` and `spark-cypher` depend on Spark SQL. Note, that the Maven module for Graph Algorithms is not part of this PR and will be introduced in https://issues.apache.org/jira/browse/SPARK-27302 A PoC for a running Cypher implementation can be found in this WIP PR https://github.com/apache/spark/pull/24297 ## How was this patch tested? Pass the Jenkins with all profiles and manually build and check the followings. ``` $ ls assembly/target/scala-2.12/jars/spark-cypher* assembly/target/scala-2.12/jars/spark-cypher_2.12-3.0.0-SNAPSHOT.jar $ ls assembly/target/scala-2.12/jars/spark-graph* | grep -v graphx assembly/target/scala-2.12/jars/spark-graph-api_2.12-3.0.0-SNAPSHOT.jar assembly/target/scala-2.12/jars/spark-graph_2.12-3.0.0-SNAPSHOT.jar ``` Closes #24490 from s1ck/SPARK-27300. Lead-authored-by: Martin Junghanns <martin.junghanns@neotechnology.com> Co-authored-by: Max Kießling <max@kopfueber.org> Co-authored-by: Martin Junghanns <martin.junghanns@neo4j.com> Co-authored-by: Dongjoon Hyun <dhyun@apple.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
274 lines
8.9 KiB
XML
274 lines
8.9 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
~ Licensed to the Apache Software Foundation (ASF) under one or more
|
|
~ contributor license agreements. See the NOTICE file distributed with
|
|
~ this work for additional information regarding copyright ownership.
|
|
~ The ASF licenses this file to You under the Apache License, Version 2.0
|
|
~ (the "License"); you may not use this file except in compliance with
|
|
~ the License. You may obtain a copy of the License at
|
|
~
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
~
|
|
~ Unless required by applicable law or agreed to in writing, software
|
|
~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
~ See the License for the specific language governing permissions and
|
|
~ limitations under the License.
|
|
-->
|
|
|
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
<modelVersion>4.0.0</modelVersion>
|
|
<parent>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-parent_2.12</artifactId>
|
|
<version>3.0.0-SNAPSHOT</version>
|
|
<relativePath>../pom.xml</relativePath>
|
|
</parent>
|
|
|
|
<artifactId>spark-assembly_2.12</artifactId>
|
|
<name>Spark Project Assembly</name>
|
|
<url>http://spark.apache.org/</url>
|
|
<packaging>pom</packaging>
|
|
|
|
<properties>
|
|
<sbt.project.name>assembly</sbt.project.name>
|
|
<build.testJarPhase>none</build.testJarPhase>
|
|
<build.copyDependenciesPhase>package</build.copyDependenciesPhase>
|
|
</properties>
|
|
|
|
<dependencies>
|
|
<!-- Prevent our dummy JAR from being included in Spark distributions or uploaded to YARN -->
|
|
<dependency>
|
|
<groupId>org.spark-project.spark</groupId>
|
|
<artifactId>unused</artifactId>
|
|
<version>1.0.0</version>
|
|
<scope>provided</scope>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-mllib_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-graph_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-repl_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
|
|
<!--
|
|
Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
|
|
that the libraries Spark depend on have it available. We'll package the version that Spark
|
|
uses (14.0.1) which is not the same as Hadoop dependencies, but works.
|
|
-->
|
|
<dependency>
|
|
<groupId>com.google.guava</groupId>
|
|
<artifactId>guava</artifactId>
|
|
<scope>${hadoop.deps.scope}</scope>
|
|
</dependency>
|
|
</dependencies>
|
|
|
|
<build>
|
|
<plugins>
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-deploy-plugin</artifactId>
|
|
<configuration>
|
|
<skip>true</skip>
|
|
</configuration>
|
|
</plugin>
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-install-plugin</artifactId>
|
|
<configuration>
|
|
<skip>true</skip>
|
|
</configuration>
|
|
</plugin>
|
|
<!-- zip pyspark archives to run python application on yarn mode -->
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-antrun-plugin</artifactId>
|
|
<executions>
|
|
<execution>
|
|
<phase>package</phase>
|
|
<goals>
|
|
<goal>run</goal>
|
|
</goals>
|
|
</execution>
|
|
</executions>
|
|
<configuration>
|
|
<target>
|
|
<delete dir="${basedir}/../python/lib/pyspark.zip"/>
|
|
<zip destfile="${basedir}/../python/lib/pyspark.zip">
|
|
<fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
|
|
</zip>
|
|
</target>
|
|
</configuration>
|
|
</plugin>
|
|
</plugins>
|
|
</build>
|
|
|
|
<profiles>
|
|
<profile>
|
|
<id>yarn</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-yarn_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>mesos</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-mesos_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>kubernetes</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-kubernetes_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>hive</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-hive_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>hive-thriftserver</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>spark-ganglia-lgpl</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-ganglia-lgpl_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
<profile>
|
|
<id>bigtop-dist</id>
|
|
<!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
|
|
that contains Spark but not the Hadoop JARs it depends on. -->
|
|
<build>
|
|
<plugins>
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-assembly-plugin</artifactId>
|
|
<version>3.1.0</version>
|
|
<executions>
|
|
<execution>
|
|
<id>dist</id>
|
|
<phase>package</phase>
|
|
<goals>
|
|
<goal>single</goal>
|
|
</goals>
|
|
<configuration>
|
|
<descriptors>
|
|
<descriptor>src/main/assembly/assembly.xml</descriptor>
|
|
</descriptors>
|
|
</configuration>
|
|
</execution>
|
|
</executions>
|
|
</plugin>
|
|
</plugins>
|
|
</build>
|
|
</profile>
|
|
|
|
<!-- Profiles that disable inclusion of certain dependencies. -->
|
|
<profile>
|
|
<id>hadoop-provided</id>
|
|
<properties>
|
|
<hadoop.deps.scope>provided</hadoop.deps.scope>
|
|
</properties>
|
|
</profile>
|
|
<profile>
|
|
<id>hive-provided</id>
|
|
<properties>
|
|
<hive.deps.scope>provided</hive.deps.scope>
|
|
</properties>
|
|
</profile>
|
|
<profile>
|
|
<id>orc-provided</id>
|
|
<properties>
|
|
<orc.deps.scope>provided</orc.deps.scope>
|
|
</properties>
|
|
</profile>
|
|
<profile>
|
|
<id>parquet-provided</id>
|
|
<properties>
|
|
<parquet.deps.scope>provided</parquet.deps.scope>
|
|
</properties>
|
|
</profile>
|
|
|
|
<!--
|
|
Pull in spark-hadoop-cloud and its associated JARs,
|
|
-->
|
|
<profile>
|
|
<id>hadoop-cloud</id>
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-hadoop-cloud_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<!--
|
|
Redeclare this dependency to force it into the distribution.
|
|
-->
|
|
<dependency>
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
<artifactId>jetty-util</artifactId>
|
|
<scope>${hadoop.deps.scope}</scope>
|
|
</dependency>
|
|
</dependencies>
|
|
</profile>
|
|
</profiles>
|
|
</project>
|