2013-12-31 03:28:57 -05:00
|
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
<!--
|
|
|
|
~ Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
~ contributor license agreements. See the NOTICE file distributed with
|
|
|
|
~ this work for additional information regarding copyright ownership.
|
|
|
|
~ The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
~ (the "License"); you may not use this file except in compliance with
|
|
|
|
~ the License. You may obtain a copy of the License at
|
|
|
|
~
|
|
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
~
|
|
|
|
~ Unless required by applicable law or agreed to in writing, software
|
|
|
|
~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
~ See the License for the specific language governing permissions and
|
|
|
|
~ limitations under the License.
|
|
|
|
-->
|
|
|
|
|
|
|
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
|
|
<modelVersion>4.0.0</modelVersion>
|
|
|
|
<parent>
|
|
|
|
<groupId>org.apache.spark</groupId>
|
2015-03-05 14:31:48 -05:00
|
|
|
<artifactId>spark-parent_2.10</artifactId>
|
2015-03-20 14:43:57 -04:00
|
|
|
<version>1.4.0-SNAPSHOT</version>
|
2013-12-31 03:28:57 -05:00
|
|
|
<relativePath>../../pom.xml</relativePath>
|
|
|
|
</parent>
|
|
|
|
|
|
|
|
<groupId>org.apache.spark</groupId>
|
|
|
|
<artifactId>spark-streaming-kafka_2.10</artifactId>
|
2014-07-10 14:03:37 -04:00
|
|
|
<properties>
|
2014-07-28 15:07:30 -04:00
|
|
|
<sbt.project.name>streaming-kafka</sbt.project.name>
|
2014-07-10 14:03:37 -04:00
|
|
|
</properties>
|
2013-12-31 03:28:57 -05:00
|
|
|
<packaging>jar</packaging>
|
|
|
|
<name>Spark Project External Kafka</name>
|
2014-03-02 04:00:16 -05:00
|
|
|
<url>http://spark.apache.org/</url>
|
2013-12-31 03:28:57 -05:00
|
|
|
|
|
|
|
<dependencies>
|
|
|
|
<dependency>
|
|
|
|
<groupId>org.apache.spark</groupId>
|
|
|
|
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
|
|
|
|
<version>${project.version}</version>
|
2014-11-19 17:18:10 -05:00
|
|
|
<scope>provided</scope>
|
2013-12-31 03:28:57 -05:00
|
|
|
</dependency>
|
|
|
|
<dependency>
|
2014-01-09 19:53:59 -05:00
|
|
|
<groupId>org.apache.kafka</groupId>
|
2013-12-31 03:28:57 -05:00
|
|
|
<artifactId>kafka_${scala.binary.version}</artifactId>
|
[SPARK-4964] [Streaming] Exactly-once semantics for Kafka
Author: cody koeninger <cody@koeninger.org>
Closes #3798 from koeninger/kafkaRdd and squashes the following commits:
1dc2941 [cody koeninger] [SPARK-4964] silence ConsumerConfig warnings about broker connection props
59e29f6 [cody koeninger] [SPARK-4964] settle on "Direct" as a naming convention for the new stream
8c31855 [cody koeninger] [SPARK-4964] remove HasOffsetRanges interface from return types
0df3ebe [cody koeninger] [SPARK-4964] add comments per pwendell / dibbhatt
8991017 [cody koeninger] [SPARK-4964] formatting
825110f [cody koeninger] [SPARK-4964] rename stuff per TD
4354bce [cody koeninger] [SPARK-4964] per td, remove java interfaces, replace with final classes, corresponding changes to KafkaRDD constructor and checkpointing
9adaa0a [cody koeninger] [SPARK-4964] formatting
0090553 [cody koeninger] [SPARK-4964] javafication of interfaces
9a838c2 [cody koeninger] [SPARK-4964] code cleanup, add more tests
2b340d8 [cody koeninger] [SPARK-4964] refactor per TD feedback
80fd6ae [cody koeninger] [SPARK-4964] Rename createExactlyOnceStream so it isnt over-promising, change doc
99d2eba [cody koeninger] [SPARK-4964] Reduce level of nesting. If beginning is past end, its actually an error (may happen if Kafka topic was deleted and recreated)
19406cc [cody koeninger] Merge branch 'master' of https://github.com/apache/spark into kafkaRdd
2e67117 [cody koeninger] [SPARK-4964] one potential way of hiding most of the implementation, while still allowing access to offsets (but not subclassing)
bb80bbe [cody koeninger] [SPARK-4964] scalastyle line length
d4a7cf7 [cody koeninger] [SPARK-4964] allow for use cases that need to override compute for custom kafka dstreams
c1bd6d9 [cody koeninger] [SPARK-4964] use newly available attemptNumber for correct retry behavior
548d529 [cody koeninger] Merge branch 'master' of https://github.com/apache/spark into kafkaRdd
0458e4e [cody koeninger] [SPARK-4964] recovery of generated rdds from checkpoint
e86317b [cody koeninger] [SPARK-4964] try seed brokers in random order to spread metadata requests
e93eb72 [cody koeninger] [SPARK-4964] refactor to add preferredLocations. depends on SPARK-4014
356c7cc [cody koeninger] [SPARK-4964] code cleanup per helena
adf99a6 [cody koeninger] [SPARK-4964] fix serialization issues for checkpointing
1d50749 [cody koeninger] [SPARK-4964] code cleanup per tdas
8bfd6c0 [cody koeninger] [SPARK-4964] configure rate limiting via spark.streaming.receiver.maxRate
e09045b [cody koeninger] [SPARK-4964] add foreachPartitionWithIndex, to avoid doing equivalent map + empty foreach boilerplate
cac63ee [cody koeninger] additional testing, fix fencepost error
37d3053 [cody koeninger] make KafkaRDDPartition available to users so offsets can be committed per partition
bcca8a4 [cody koeninger] Merge branch 'master' of https://github.com/apache/spark into kafkaRdd
6bf14f2 [cody koeninger] first attempt at a Kafka dstream that allows for exactly-once semantics
326ff3c [cody koeninger] add some tests
38bb727 [cody koeninger] give easy access to the parameters of a KafkaRDD
979da25 [cody koeninger] dont allow empty leader offsets to be returned
8d7de4a [cody koeninger] make sure leader offsets can be found even for leaders that arent in the seed brokers
4b078bf [cody koeninger] differentiate between leader and consumer offsets in error message
3c2a96a [cody koeninger] fix scalastyle errors
29c6b43 [cody koeninger] cleanup logging
783b477 [cody koeninger] update tests for kafka 8.1.1
7d050bc [cody koeninger] methods to set consumer offsets and get topic metadata, switch back to inclusive start / exclusive end to match typical kafka consumer behavior
ce91c59 [cody koeninger] method to get consumer offsets, explicit error handling
4dafd1b [cody koeninger] method to get leader offsets, switch rdd bound to being exclusive start, inclusive end to match offsets typically returned from cluster
0b94b33 [cody koeninger] use dropWhile rather than filter to trim beginning of fetch response
1d70625 [cody koeninger] WIP on kafka cluster
76913e2 [cody koeninger] Batch oriented kafka rdd, WIP. todo: cluster metadata / finding leader
2015-02-04 15:06:34 -05:00
|
|
|
<version>0.8.1.1</version>
|
2013-12-31 03:28:57 -05:00
|
|
|
<exclusions>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>com.sun.jmx</groupId>
|
|
|
|
<artifactId>jmxri</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>com.sun.jdmk</groupId>
|
|
|
|
<artifactId>jmxtools</artifactId>
|
|
|
|
</exclusion>
|
|
|
|
<exclusion>
|
|
|
|
<groupId>net.sf.jopt-simple</groupId>
|
|
|
|
<artifactId>jopt-simple</artifactId>
|
|
|
|
</exclusion>
|
SPARK-1071: Tidy logging strategy and use of log4j
Prompted by a recent thread on the mailing list, I tried and failed to see if Spark can be made independent of log4j. There are a few cases where control of the underlying logging is pretty useful, and to do that, you have to bind to a specific logger.
Instead I propose some tidying that leaves Spark's use of log4j, but gets rid of warnings and should still enable downstream users to switch. The idea is to pipe everything (except log4j) through SLF4J, and have Spark use SLF4J directly when logging, and where Spark needs to output info (REPL and tests), bind from SLF4J to log4j.
This leaves the same behavior in Spark. It means that downstream users who want to use something except log4j should:
- Exclude dependencies on log4j, slf4j-log4j12 from Spark
- Include dependency on log4j-over-slf4j
- Include dependency on another logger X, and another slf4j-X
- Recreate any log config that Spark does, that is needed, in the other logger's config
That sounds about right.
Here are the key changes:
- Include the jcl-over-slf4j shim everywhere by depending on it in core.
- Exclude dependencies on commons-logging from third-party libraries.
- Include the jul-to-slf4j shim everywhere by depending on it in core.
- Exclude slf4j-* dependencies from third-party libraries to prevent collision or warnings
- Added missing slf4j-log4j12 binding to GraphX, Bagel module tests
And minor/incidental changes:
- Update to SLF4J 1.7.5, which happily matches Hadoop 2’s version and is a recommended update over 1.7.2
- (Remove a duplicate HBase dependency declaration in SparkBuild.scala)
- (Remove a duplicate mockito dependency declaration that was causing warnings and bugging me)
Author: Sean Owen <sowen@cloudera.com>
Closes #570 from srowen/SPARK-1071 and squashes the following commits:
52eac9f [Sean Owen] Add slf4j-over-log4j12 dependency to core (non-test) and remove it from things that depend on core.
77a7fa9 [Sean Owen] SPARK-1071: Tidy logging strategy and use of log4j
2014-02-23 14:40:55 -05:00
|
|
|
<exclusion>
|
|
|
|
<groupId>org.slf4j</groupId>
|
|
|
|
<artifactId>slf4j-simple</artifactId>
|
|
|
|
</exclusion>
|
2014-08-06 02:41:34 -04:00
|
|
|
<exclusion>
|
|
|
|
<groupId>org.apache.zookeeper</groupId>
|
|
|
|
<artifactId>zookeeper</artifactId>
|
|
|
|
</exclusion>
|
2013-12-31 03:28:57 -05:00
|
|
|
</exclusions>
|
|
|
|
</dependency>
|
2014-08-05 13:40:28 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>net.sf.jopt-simple</groupId>
|
|
|
|
<artifactId>jopt-simple</artifactId>
|
|
|
|
<version>3.2</version>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2013-12-31 03:28:57 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>org.scalacheck</groupId>
|
|
|
|
<artifactId>scalacheck_${scala.binary.version}</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2014-07-30 18:04:33 -04:00
|
|
|
<dependency>
|
|
|
|
<groupId>junit</groupId>
|
|
|
|
<artifactId>junit</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
2013-12-31 03:28:57 -05:00
|
|
|
<dependency>
|
|
|
|
<groupId>com.novocode</groupId>
|
|
|
|
<artifactId>junit-interface</artifactId>
|
|
|
|
<scope>test</scope>
|
|
|
|
</dependency>
|
|
|
|
</dependencies>
|
|
|
|
<build>
|
|
|
|
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
|
|
|
|
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
|
|
|
|
</build>
|
|
|
|
</project>
|