2012-08-10 04:10:02 -04:00
|
|
|
import sys
|
|
|
|
from random import Random
|
2013-01-01 16:52:14 -05:00
|
|
|
|
|
|
|
from pyspark import SparkContext
|
2012-08-10 04:10:02 -04:00
|
|
|
|
|
|
|
numEdges = 200
|
|
|
|
numVertices = 100
|
|
|
|
rand = Random(42)
|
|
|
|
|
|
|
|
|
|
|
|
def generateGraph():
|
|
|
|
edges = set()
|
|
|
|
while len(edges) < numEdges:
|
|
|
|
src = rand.randrange(0, numEdges)
|
|
|
|
dst = rand.randrange(0, numEdges)
|
|
|
|
if src != dst:
|
|
|
|
edges.add((src, dst))
|
|
|
|
return edges
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if len(sys.argv) == 1:
|
|
|
|
print >> sys.stderr, \
|
2013-01-01 16:52:14 -05:00
|
|
|
"Usage: PythonTC <master> [<slices>]"
|
2012-08-10 04:10:02 -04:00
|
|
|
exit(-1)
|
2012-08-27 03:13:19 -04:00
|
|
|
sc = SparkContext(sys.argv[1], "PythonTC")
|
2013-03-10 01:16:19 -05:00
|
|
|
slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
|
2012-08-27 03:13:19 -04:00
|
|
|
tc = sc.parallelize(generateGraph(), slices).cache()
|
2012-08-10 04:10:02 -04:00
|
|
|
|
|
|
|
# Linear transitive closure: each round grows paths by one edge,
|
|
|
|
# by joining the graph's edges with the already-discovered paths.
|
|
|
|
# e.g. join the path (y, z) from the TC with the edge (x, y) from
|
|
|
|
# the graph to obtain the path (x, z).
|
|
|
|
|
|
|
|
# Because join() joins on keys, the edges are stored in reversed order.
|
2012-08-27 03:13:19 -04:00
|
|
|
edges = tc.map(lambda (x, y): (y, x))
|
2012-08-10 04:10:02 -04:00
|
|
|
|
|
|
|
oldCount = 0L
|
|
|
|
nextCount = tc.count()
|
|
|
|
while True:
|
|
|
|
oldCount = nextCount
|
|
|
|
# Perform the join, obtaining an RDD of (y, (z, x)) pairs,
|
|
|
|
# then project the result to obtain the new (x, z) paths.
|
2012-08-27 03:13:19 -04:00
|
|
|
new_edges = tc.join(edges).map(lambda (_, (a, b)): (b, a))
|
2012-08-10 04:10:02 -04:00
|
|
|
tc = tc.union(new_edges).distinct().cache()
|
|
|
|
nextCount = tc.count()
|
|
|
|
if nextCount == oldCount:
|
|
|
|
break
|
|
|
|
|
|
|
|
print "TC has %i edges" % tc.count()
|