2013-12-31 22:30:08 -05:00
|
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
<!--
|
|
|
|
~ Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
~ contributor license agreements. See the NOTICE file distributed with
|
|
|
|
~ this work for additional information regarding copyright ownership.
|
|
|
|
~ The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
~ (the "License"); you may not use this file except in compliance with
|
|
|
|
~ the License. You may obtain a copy of the License at
|
|
|
|
~
|
|
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
~
|
|
|
|
~ Unless required by applicable law or agreed to in writing, software
|
|
|
|
~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
~ See the License for the specific language governing permissions and
|
|
|
|
~ limitations under the License.
|
|
|
|
-->
|
|
|
|
|
|
|
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
2014-03-08 02:10:35 -05:00
|
|
|
<modelVersion>4.0.0</modelVersion>
|
|
|
|
<parent>
|
2013-12-31 22:30:08 -05:00
|
|
|
<groupId>org.apache.spark</groupId>
|
2014-03-08 02:10:35 -05:00
|
|
|
<artifactId>spark-parent</artifactId>
|
2014-06-05 14:27:33 -04:00
|
|
|
<version>1.1.0-SNAPSHOT</version>
|
2014-03-08 02:10:35 -05:00
|
|
|
<relativePath>../pom.xml</relativePath>
|
|
|
|
</parent>
|
2013-12-31 22:30:08 -05:00
|
|
|
|
2014-03-08 02:10:35 -05:00
|
|
|
<groupId>org.apache.spark</groupId>
|
|
|
|
<artifactId>spark-core_2.10</artifactId>
|
|
|
|
<packaging>jar</packaging>
|
|
|
|
<name>Spark Project Core</name>
|
|
|
|
<url>http://spark.apache.org/</url>
|
|
|
|
<dependencies>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.hadoop</groupId>
|
|
|
|
<artifactId>hadoop-client</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>net.java.dev.jets3t</groupId>
|
|
|
|
<artifactId>jets3t</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.curator</groupId>
|
|
|
|
<artifactId>curator-recipes</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
|
|
<artifactId>jetty-plus</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
|
|
<artifactId>jetty-security</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
|
|
<artifactId>jetty-util</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
|
|
<artifactId>jetty-server</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.google.guava</groupId>
|
|
|
|
<artifactId>guava</artifactId>
|
|
|
|
</dependency>
|
2014-05-04 20:43:28 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.commons</groupId>
|
|
|
|
<artifactId>commons-lang3</artifactId>
|
|
|
|
</dependency>
|
2014-06-12 22:44:27 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.commons</groupId>
|
|
|
|
<artifactId>commons-math3</artifactId>
|
2014-06-13 05:59:38 -04:00
|
|
|
<version>3.3</version>
|
2014-06-12 22:44:27 -04:00
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2014-04-03 20:00:06 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>com.google.code.findbugs</groupId>
|
|
|
|
<artifactId>jsr305</artifactId>
|
|
|
|
</dependency>
|
2014-03-08 02:10:35 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.slf4j</groupId>
|
|
|
|
<artifactId>slf4j-api</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.slf4j</groupId>
|
|
|
|
<artifactId>jul-to-slf4j</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.slf4j</groupId>
|
|
|
|
<artifactId>jcl-over-slf4j</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>log4j</groupId>
|
|
|
|
<artifactId>log4j</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.slf4j</groupId>
|
|
|
|
<artifactId>slf4j-log4j12</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.ning</groupId>
|
|
|
|
<artifactId>compress-lzf</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.xerial.snappy</groupId>
|
|
|
|
<artifactId>snappy-java</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.twitter</groupId>
|
|
|
|
<artifactId>chill_${scala.binary.version}</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.twitter</groupId>
|
|
|
|
<artifactId>chill-java</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>commons-net</groupId>
|
|
|
|
<artifactId>commons-net</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>${akka.group}</groupId>
|
|
|
|
<artifactId>akka-remote_${scala.binary.version}</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>${akka.group}</groupId>
|
|
|
|
<artifactId>akka-slf4j_${scala.binary.version}</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>${akka.group}</groupId>
|
|
|
|
<artifactId>akka-testkit_${scala.binary.version}</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.scala-lang</groupId>
|
|
|
|
<artifactId>scala-library</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.json4s</groupId>
|
|
|
|
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
|
|
|
|
<version>3.2.6</version>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>colt</groupId>
|
|
|
|
<artifactId>colt</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.mesos</groupId>
|
|
|
|
<artifactId>mesos</artifactId>
|
2014-05-12 14:10:28 -04:00
|
|
|
<classifier>${mesos.classifier}</classifier>
|
2014-03-08 02:10:35 -05:00
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>io.netty</groupId>
|
|
|
|
<artifactId>netty-all</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.clearspring.analytics</groupId>
|
|
|
|
<artifactId>stream</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.codahale.metrics</groupId>
|
|
|
|
<artifactId>metrics-core</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.codahale.metrics</groupId>
|
|
|
|
<artifactId>metrics-jvm</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.codahale.metrics</groupId>
|
|
|
|
<artifactId>metrics-json</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>com.codahale.metrics</groupId>
|
|
|
|
<artifactId>metrics-graphite</artifactId>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.derby</groupId>
|
|
|
|
<artifactId>derby</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2014-04-04 23:36:24 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.tachyonproject</groupId>
|
|
|
|
<artifactId>tachyon</artifactId>
|
|
|
|
<version>0.4.1-thrift</version>
|
|
|
|
<exclusions>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.apache.hadoop</groupId>
|
|
|
|
<artifactId>hadoop-client</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.apache.curator</groupId>
|
|
|
|
<artifactId>curator-recipes</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
|
|
<artifactId>jetty-jsp</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
|
|
<artifactId>jetty-webapp</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
|
|
<artifactId>jetty-server</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.eclipse.jetty</groupId>
|
|
|
|
<artifactId>jetty-servlet</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>junit</groupId>
|
|
|
|
<artifactId>junit</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.powermock</groupId>
|
|
|
|
<artifactId>powermock-module-junit4</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.powermock</groupId>
|
|
|
|
<artifactId>powermock-api-mockito</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>org.apache.curator</groupId>
|
|
|
|
<artifactId>curator-test</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
</exclusions>
|
|
|
|
</dependency>
|
2014-03-08 02:10:35 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.scalatest</groupId>
|
|
|
|
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.mockito</groupId>
|
|
|
|
<artifactId>mockito-all</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.scalacheck</groupId>
|
|
|
|
<artifactId>scalacheck_${scala.binary.version}</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.easymock</groupId>
|
2014-06-06 14:45:21 -04:00
|
|
|
<artifactId>easymockclassextension</artifactId>
|
2014-03-08 02:10:35 -05:00
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2014-06-20 23:05:12 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>asm</groupId>
|
|
|
|
<artifactId>asm</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2014-03-08 02:10:35 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>com.novocode</groupId>
|
|
|
|
<artifactId>junit-interface</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
SPARK-1374: PySpark API for SparkSQL
An initial API that exposes SparkSQL functionality in PySpark. A PythonRDD composed of dictionaries, with string keys and primitive values (boolean, float, int, long, string) can be converted into a SchemaRDD that supports sql queries.
```
from pyspark.context import SQLContext
sqlCtx = SQLContext(sc)
rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"}, {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
srdd = sqlCtx.applySchema(rdd)
sqlCtx.registerRDDAsTable(srdd, "table1")
srdd2 = sqlCtx.sql("SELECT field1 AS f1, field2 as f2 from table1")
srdd2.collect()
```
The last line yields ```[{"f1" : 1, "f2" : "row1"}, {"f1" : 2, "f2": "row2"}, {"f1" : 3, "f2": "row3"}]```
Author: Ahir Reddy <ahirreddy@gmail.com>
Author: Michael Armbrust <michael@databricks.com>
Closes #363 from ahirreddy/pysql and squashes the following commits:
0294497 [Ahir Reddy] Updated log4j properties to supress Hive Warns
307d6e0 [Ahir Reddy] Style fix
6f7b8f6 [Ahir Reddy] Temporary fix MIMA checker. Since we now assemble Spark jar with Hive, we don't want to check the interfaces of all of our hive dependencies
3ef074a [Ahir Reddy] Updated documentation because classes moved to sql.py
29245bf [Ahir Reddy] Cache underlying SchemaRDD instead of generating and caching PythonRDD
f2312c7 [Ahir Reddy] Moved everything into sql.py
a19afe4 [Ahir Reddy] Doc fixes
6d658ba [Ahir Reddy] Remove the metastore directory created by the HiveContext tests in SparkSQL
521ff6d [Ahir Reddy] Trying to get spark to build with hive
ab95eba [Ahir Reddy] Set SPARK_HIVE=true on jenkins
ded03e7 [Ahir Reddy] Added doc test for HiveContext
22de1d4 [Ahir Reddy] Fixed maven pyrolite dependency
e4da06c [Ahir Reddy] Display message if hive is not built into spark
227a0be [Michael Armbrust] Update API links. Fix Hive example.
58e2aa9 [Michael Armbrust] Build Docs for pyspark SQL Api. Minor fixes.
4285340 [Michael Armbrust] Fix building of Hive API Docs.
38a92b0 [Michael Armbrust] Add note to future non-python developers about python docs.
337b201 [Ahir Reddy] Changed com.clearspring.analytics stream version from 2.4.0 to 2.5.1 to match SBT build, and added pyrolite to maven build
40491c9 [Ahir Reddy] PR Changes + Method Visibility
1836944 [Michael Armbrust] Fix comments.
e00980f [Michael Armbrust] First draft of python sql programming guide.
b0192d3 [Ahir Reddy] Added Long, Double and Boolean as usable types + unit test
f98a422 [Ahir Reddy] HiveContexts
79621cf [Ahir Reddy] cleaning up cruft
b406ba0 [Ahir Reddy] doctest formatting
20936a5 [Ahir Reddy] Added tests and documentation
e4d21b4 [Ahir Reddy] Added pyrolite dependency
79f739d [Ahir Reddy] added more tests
7515ba0 [Ahir Reddy] added more tests :)
d26ec5e [Ahir Reddy] added test
e9f5b8d [Ahir Reddy] adding tests
906d180 [Ahir Reddy] added todo explaining cost of creating Row object in python
251f99d [Ahir Reddy] for now only allow dictionaries as input
09b9980 [Ahir Reddy] made jrdd explicitly lazy
c608947 [Ahir Reddy] SchemaRDD now has all RDD operations
725c91e [Ahir Reddy] awesome row objects
55d1c76 [Ahir Reddy] return row objects
4fe1319 [Ahir Reddy] output dictionaries correctly
be079de [Ahir Reddy] returning dictionaries works
cd5f79f [Ahir Reddy] Switched to using Scala SQLContext
e948bd9 [Ahir Reddy] yippie
4886052 [Ahir Reddy] even better
c0fb1c6 [Ahir Reddy] more working
043ca85 [Ahir Reddy] working
5496f9f [Ahir Reddy] doesn't crash
b8b904b [Ahir Reddy] Added schema rdd class
67ba875 [Ahir Reddy] java to python, and python to java
bcc0f23 [Ahir Reddy] Java to python
ab6025d [Ahir Reddy] compiling
2014-04-15 03:07:55 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.spark-project</groupId>
|
|
|
|
<artifactId>pyrolite</artifactId>
|
2014-04-22 12:44:41 -04:00
|
|
|
<version>2.0.1</version>
|
SPARK-1374: PySpark API for SparkSQL
An initial API that exposes SparkSQL functionality in PySpark. A PythonRDD composed of dictionaries, with string keys and primitive values (boolean, float, int, long, string) can be converted into a SchemaRDD that supports sql queries.
```
from pyspark.context import SQLContext
sqlCtx = SQLContext(sc)
rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"}, {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
srdd = sqlCtx.applySchema(rdd)
sqlCtx.registerRDDAsTable(srdd, "table1")
srdd2 = sqlCtx.sql("SELECT field1 AS f1, field2 as f2 from table1")
srdd2.collect()
```
The last line yields ```[{"f1" : 1, "f2" : "row1"}, {"f1" : 2, "f2": "row2"}, {"f1" : 3, "f2": "row3"}]```
Author: Ahir Reddy <ahirreddy@gmail.com>
Author: Michael Armbrust <michael@databricks.com>
Closes #363 from ahirreddy/pysql and squashes the following commits:
0294497 [Ahir Reddy] Updated log4j properties to supress Hive Warns
307d6e0 [Ahir Reddy] Style fix
6f7b8f6 [Ahir Reddy] Temporary fix MIMA checker. Since we now assemble Spark jar with Hive, we don't want to check the interfaces of all of our hive dependencies
3ef074a [Ahir Reddy] Updated documentation because classes moved to sql.py
29245bf [Ahir Reddy] Cache underlying SchemaRDD instead of generating and caching PythonRDD
f2312c7 [Ahir Reddy] Moved everything into sql.py
a19afe4 [Ahir Reddy] Doc fixes
6d658ba [Ahir Reddy] Remove the metastore directory created by the HiveContext tests in SparkSQL
521ff6d [Ahir Reddy] Trying to get spark to build with hive
ab95eba [Ahir Reddy] Set SPARK_HIVE=true on jenkins
ded03e7 [Ahir Reddy] Added doc test for HiveContext
22de1d4 [Ahir Reddy] Fixed maven pyrolite dependency
e4da06c [Ahir Reddy] Display message if hive is not built into spark
227a0be [Michael Armbrust] Update API links. Fix Hive example.
58e2aa9 [Michael Armbrust] Build Docs for pyspark SQL Api. Minor fixes.
4285340 [Michael Armbrust] Fix building of Hive API Docs.
38a92b0 [Michael Armbrust] Add note to future non-python developers about python docs.
337b201 [Ahir Reddy] Changed com.clearspring.analytics stream version from 2.4.0 to 2.5.1 to match SBT build, and added pyrolite to maven build
40491c9 [Ahir Reddy] PR Changes + Method Visibility
1836944 [Michael Armbrust] Fix comments.
e00980f [Michael Armbrust] First draft of python sql programming guide.
b0192d3 [Ahir Reddy] Added Long, Double and Boolean as usable types + unit test
f98a422 [Ahir Reddy] HiveContexts
79621cf [Ahir Reddy] cleaning up cruft
b406ba0 [Ahir Reddy] doctest formatting
20936a5 [Ahir Reddy] Added tests and documentation
e4d21b4 [Ahir Reddy] Added pyrolite dependency
79f739d [Ahir Reddy] added more tests
7515ba0 [Ahir Reddy] added more tests :)
d26ec5e [Ahir Reddy] added test
e9f5b8d [Ahir Reddy] adding tests
906d180 [Ahir Reddy] added todo explaining cost of creating Row object in python
251f99d [Ahir Reddy] for now only allow dictionaries as input
09b9980 [Ahir Reddy] made jrdd explicitly lazy
c608947 [Ahir Reddy] SchemaRDD now has all RDD operations
725c91e [Ahir Reddy] awesome row objects
55d1c76 [Ahir Reddy] return row objects
4fe1319 [Ahir Reddy] output dictionaries correctly
be079de [Ahir Reddy] returning dictionaries works
cd5f79f [Ahir Reddy] Switched to using Scala SQLContext
e948bd9 [Ahir Reddy] yippie
4886052 [Ahir Reddy] even better
c0fb1c6 [Ahir Reddy] more working
043ca85 [Ahir Reddy] working
5496f9f [Ahir Reddy] doesn't crash
b8b904b [Ahir Reddy] Added schema rdd class
67ba875 [Ahir Reddy] java to python, and python to java
bcc0f23 [Ahir Reddy] Java to python
ab6025d [Ahir Reddy] compiling
2014-04-15 03:07:55 -04:00
|
|
|
</dependency>
|
[SPARK-1549] Add Python support to spark-submit
This PR updates spark-submit to allow submitting Python scripts (currently only with deploy-mode=client, but that's all that was supported before) and updates the PySpark code to properly find various paths, etc. One significant change is that we assume we can always find the Python files either from the Spark assembly JAR (which will happen with the Maven assembly build in make-distribution.sh) or from SPARK_HOME (which will exist in local mode even if you use sbt assembly, and should be enough for testing). This means we no longer need a weird hack to modify the environment for YARN.
This patch also updates the Python worker manager to run python with -u, which means unbuffered output (send it to our logs right away instead of waiting a while after stuff was written); this should simplify debugging.
In addition, it fixes https://issues.apache.org/jira/browse/SPARK-1709, setting the main class from a JAR's Main-Class attribute if not specified by the user, and fixes a few help strings and style issues in spark-submit.
In the future we may want to make the `pyspark` shell use spark-submit as well, but it seems unnecessary for 1.0.
Author: Matei Zaharia <matei@databricks.com>
Closes #664 from mateiz/py-submit and squashes the following commits:
15e9669 [Matei Zaharia] Fix some uses of path.separator property
051278c [Matei Zaharia] Small style fixes
0afe886 [Matei Zaharia] Add license headers
4650412 [Matei Zaharia] Add pyFiles to PYTHONPATH in executors, remove old YARN stuff, add tests
15f8e1e [Matei Zaharia] Set PYTHONPATH in PythonWorkerFactory in case it wasn't set from outside
47c0655 [Matei Zaharia] More work to make spark-submit work with Python:
d4375bd [Matei Zaharia] Clean up description of spark-submit args a bit and add Python ones
2014-05-06 18:12:35 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>net.sf.py4j</groupId>
|
|
|
|
<artifactId>py4j</artifactId>
|
|
|
|
<version>0.8.1</version>
|
|
|
|
</dependency>
|
2014-03-08 02:10:35 -05:00
|
|
|
</dependencies>
|
|
|
|
<build>
|
|
|
|
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
|
|
|
|
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
|
|
|
|
<plugins>
|
|
|
|
<plugin>
|
|
|
|
<groupId>org.scalatest</groupId>
|
|
|
|
<artifactId>scalatest-maven-plugin</artifactId>
|
|
|
|
<configuration>
|
|
|
|
<environmentVariables>
|
|
|
|
<SPARK_HOME>${basedir}/..</SPARK_HOME>
|
|
|
|
<SPARK_TESTING>1</SPARK_TESTING>
|
|
|
|
<SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
|
|
|
|
</environmentVariables>
|
|
|
|
</configuration>
|
|
|
|
</plugin>
|
2014-04-30 02:24:34 -04:00
|
|
|
<!-- Unzip py4j so we can include its files in the jar -->
|
|
|
|
<plugin>
|
|
|
|
<groupId>org.codehaus.mojo</groupId>
|
|
|
|
<artifactId>exec-maven-plugin</artifactId>
|
|
|
|
<version>1.2.1</version>
|
|
|
|
<executions>
|
|
|
|
<execution>
|
|
|
|
<phase>generate-resources</phase>
|
|
|
|
<goals>
|
|
|
|
<goal>exec</goal>
|
|
|
|
</goals>
|
|
|
|
</execution>
|
|
|
|
</executions>
|
|
|
|
<configuration>
|
|
|
|
<executable>unzip</executable>
|
|
|
|
<workingDirectory>../python</workingDirectory>
|
|
|
|
<arguments>
|
|
|
|
<argument>-o</argument>
|
|
|
|
<argument>lib/py4j*.zip</argument>
|
|
|
|
<argument>-d</argument>
|
|
|
|
<argument>build</argument>
|
|
|
|
</arguments>
|
|
|
|
</configuration>
|
|
|
|
</plugin>
|
2014-03-08 02:10:35 -05:00
|
|
|
</plugins>
|
2014-05-12 14:10:28 -04:00
|
|
|
|
2014-04-30 02:24:34 -04:00
|
|
|
<resources>
|
|
|
|
<resource>
|
|
|
|
<directory>src/main/resources</directory>
|
|
|
|
</resource>
|
|
|
|
<resource>
|
|
|
|
<directory>../python</directory>
|
|
|
|
<includes>
|
|
|
|
<include>pyspark/*.py</include>
|
|
|
|
</includes>
|
|
|
|
</resource>
|
|
|
|
<resource>
|
|
|
|
<directory>../python/build</directory>
|
|
|
|
<includes>
|
|
|
|
<include>py4j/*.py</include>
|
|
|
|
</includes>
|
|
|
|
</resource>
|
|
|
|
</resources>
|
2014-03-08 02:10:35 -05:00
|
|
|
</build>
|
2013-12-31 22:30:08 -05:00
|
|
|
</project>
|