[SPARK-9812] [STREAMING] Fix Python 3 compatibility issue in PySpark Streaming and some docs
This PR includes the following fixes: 1. Use `range` instead of `xrange` in `queue_stream.py` to support Python 3. 2. Fix the issue that `utf8_decoder` will return `bytes` rather than `str` when receiving an empty `bytes` in Python 3. 3. Fix the commands in docs so that the user can copy them directly to the command line. The previous commands was broken in the middle of a path, so when copying to the command line, the path would be split to two parts by the extra spaces, which forces the user to fix it manually. Author: zsxwing <zsxwing@gmail.com> Closes #8315 from zsxwing/SPARK-9812.
This commit is contained in:
parent
2f2686a73f
commit
1f29d502e7
|
@ -23,8 +23,8 @@
|
|||
http://kafka.apache.org/documentation.html#quickstart
|
||||
|
||||
and then run the example
|
||||
`$ bin/spark-submit --jars external/kafka-assembly/target/scala-*/\
|
||||
spark-streaming-kafka-assembly-*.jar \
|
||||
`$ bin/spark-submit --jars \
|
||||
external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar \
|
||||
examples/src/main/python/streaming/direct_kafka_wordcount.py \
|
||||
localhost:9092 test`
|
||||
"""
|
||||
|
@ -37,7 +37,7 @@ from pyspark.streaming.kafka import KafkaUtils
|
|||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print >> sys.stderr, "Usage: direct_kafka_wordcount.py <broker_list> <topic>"
|
||||
print("Usage: direct_kafka_wordcount.py <broker_list> <topic>", file=sys.stderr)
|
||||
exit(-1)
|
||||
|
||||
sc = SparkContext(appName="PythonStreamingDirectKafkaWordCount")
|
||||
|
|
|
@ -23,8 +23,9 @@
|
|||
https://flume.apache.org/documentation.html
|
||||
|
||||
and then run the example
|
||||
`$ bin/spark-submit --jars external/flume-assembly/target/scala-*/\
|
||||
spark-streaming-flume-assembly-*.jar examples/src/main/python/streaming/flume_wordcount.py \
|
||||
`$ bin/spark-submit --jars \
|
||||
external/flume-assembly/target/scala-*/spark-streaming-flume-assembly-*.jar \
|
||||
examples/src/main/python/streaming/flume_wordcount.py \
|
||||
localhost 12345
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
|
|
@ -23,8 +23,9 @@
|
|||
http://kafka.apache.org/documentation.html#quickstart
|
||||
|
||||
and then run the example
|
||||
`$ bin/spark-submit --jars external/kafka-assembly/target/scala-*/\
|
||||
spark-streaming-kafka-assembly-*.jar examples/src/main/python/streaming/kafka_wordcount.py \
|
||||
`$ bin/spark-submit --jars \
|
||||
external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar \
|
||||
examples/src/main/python/streaming/kafka_wordcount.py \
|
||||
localhost:2181 test`
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
|
|
@ -26,8 +26,9 @@
|
|||
http://www.eclipse.org/paho/#getting-started
|
||||
|
||||
and then run the example
|
||||
`$ bin/spark-submit --jars external/mqtt-assembly/target/scala-*/\
|
||||
spark-streaming-mqtt-assembly-*.jar examples/src/main/python/streaming/mqtt_wordcount.py \
|
||||
`$ bin/spark-submit --jars \
|
||||
external/mqtt-assembly/target/scala-*/spark-streaming-mqtt-assembly-*.jar \
|
||||
examples/src/main/python/streaming/mqtt_wordcount.py \
|
||||
tcp://localhost:1883 foo`
|
||||
"""
|
||||
|
||||
|
|
|
@ -36,8 +36,8 @@ if __name__ == "__main__":
|
|||
# Create the queue through which RDDs can be pushed to
|
||||
# a QueueInputDStream
|
||||
rddQueue = []
|
||||
for i in xrange(5):
|
||||
rddQueue += [ssc.sparkContext.parallelize([j for j in xrange(1, 1001)], 10)]
|
||||
for i in range(5):
|
||||
rddQueue += [ssc.sparkContext.parallelize([j for j in range(1, 1001)], 10)]
|
||||
|
||||
# Create the QueueInputDStream and use it do some processing
|
||||
inputStream = ssc.queueStream(rddQueue)
|
||||
|
|
|
@ -31,7 +31,9 @@ __all__ = ['FlumeUtils', 'utf8_decoder']
|
|||
|
||||
def utf8_decoder(s):
|
||||
""" Decode the unicode as UTF-8 """
|
||||
return s and s.decode('utf-8')
|
||||
if s is None:
|
||||
return None
|
||||
return s.decode('utf-8')
|
||||
|
||||
|
||||
class FlumeUtils(object):
|
||||
|
|
|
@ -29,7 +29,9 @@ __all__ = ['Broker', 'KafkaUtils', 'OffsetRange', 'TopicAndPartition', 'utf8_dec
|
|||
|
||||
def utf8_decoder(s):
|
||||
""" Decode the unicode as UTF-8 """
|
||||
return s and s.decode('utf-8')
|
||||
if s is None:
|
||||
return None
|
||||
return s.decode('utf-8')
|
||||
|
||||
|
||||
class KafkaUtils(object):
|
||||
|
|
|
@ -26,7 +26,9 @@ __all__ = ['KinesisUtils', 'InitialPositionInStream', 'utf8_decoder']
|
|||
|
||||
def utf8_decoder(s):
|
||||
""" Decode the unicode as UTF-8 """
|
||||
return s and s.decode('utf-8')
|
||||
if s is None:
|
||||
return None
|
||||
return s.decode('utf-8')
|
||||
|
||||
|
||||
class KinesisUtils(object):
|
||||
|
|
Loading…
Reference in a new issue