5.6 KiB
5.6 KiB
title | author |
---|---|
CSTA @nalytics Workshop | Oliver Kennedy |
Oliver is presenting a workshop on Data @nalytics at The WNY-CSTA Fall Conference. Hello to all the high-school teachers in attendance!
The solution developed in class
import sys; import twitter; import pickle; import time; from cache import read_cache, write_cache; from keys import consumer_key, consumer_secret, access_token, access_token_secret; api = twitter.Api(consumer_key = consumer_key, consumer_secret = consumer_secret, access_token_key = access_token, access_token_secret = access_token_secret ); #print api.VerifyCredentials(); # Rate limit status def followers(user_id): try: followers = read_cache(str(user_id)+"_followers") print "Cached results for "+str(user_id) except: sleep_time = api.GetSleepTime("followers/ids"); if sleep_time != 0: print "Goodnight for "+str(sleep_time)+" seconds"; sleep(sleep_time) followers = api.GetFollowerIDs(user_id) write_cache(str(user_id)+"_followers", followers) return followers print followers(45606271)
My complete solution:
import sys; import twitter; import pickle; import time; from keys import consumer_key, consumer_secret, access_token, access_token_secret; api = twitter.Api(consumer_key = consumer_key, consumer_secret = consumer_secret, access_token_key = access_token, access_token_secret = access_token_secret); #print api.VerifyCredentials(); # Rate limit status #print api.GetRateLimitStatus(); # Average time to sleep per request to avoid exceeding the threshold. # or 0 if the threshold has been reached sleep_time = max( api.GetAverageSleepTime('followers/ids'), api.GetAverageSleepTime('friends/ids') ); print "Will probably need to sleep "+str(sleep_time)+" seconds per request" # Time required to sleep per request if the threshold has been reached # or 0 if the threshold has not been reached #print api.GetSleepTime('followers/ids') #print api.GetSleepTime('friends/ids') #print api.GetFollowerIDs(45606271); #print api.GetFriendIDs(45606271); def read_cache(cname): cname = "cache/"+cname; with open(cname) as f: ret = pickle.load(f) f.close() return ret; def write_cache(cname, value): cname = "cache/"+cname; with open(cname, 'w+') as f: pickle.dump(value, f) f.close() return value; def sleep_for(time_to_sleep): if time_to_sleep > 0: sleep_interval = 15 print ("_"*int((time_to_sleep+sleep_interval-1) / sleep_interval)) sys.stdout.flush(); while time_to_sleep > 0: time.sleep(min(time_to_sleep, sleep_interval)) sys.stdout.write("="); sys.stdout.flush(); time_to_sleep -= sleep_interval; print ""; def followers(uid): try: ret = read_cache(str(uid)+"_followers"); # print "followers of user " + str(uid) + " are cached"; return ret; except: sleep_time = api.GetSleepTime('followers/ids') if sleep_time > 0: sleep_time += 30 print "Need to fetch followers of "+str(uid)+"; sleeping for "+str(sleep_time) sleep_for(sleep_time); try: return write_cache( str(uid)+"_followers", api.GetFollowerIDs(uid, count = 100, total_count = 100) ) except twitter.TwitterError, e: print "Caught: " + str(e); if str(e) == "Not authorized.": return write_cache( str(uid)+"_followers", [] ) raise e #def friends(uid): # try: # ret = read_cache(str(uid)+"_friends"); # print "friends of user " + str(uid) + " are cached"; # return ret; # except: # sleep_time = api.GetSleepTime('friends/ids') # print "Need to fetch friends of "+str(uid)+"; sleeping for "+str(sleep_time) # time.sleep(sleep_time); # return write_cache( # str(uid)+"_friends", # api.GetFriendIDs(uid, count = 200) # ) def connected(uid): return set(followers(uid))# | set(friends(uid)) me = 45606271; completed = set(); todo = [ me ]; while len(todo) > 0 and len(completed) < 300: next = todo.pop(0); if(next not in completed): connections = connected(next); # print("Connections for "+str(next)+": "+str(connections)); todo.extend(followers(next)); completed.add(next) print "Completed: " + str(completed) users = dict() for u in completed: u_followers = list(set(followers(u)) & completed); if u not in users: users[u] = dict() for f in u_followers: users[u][f] = 1; if f not in users: users[f] = dict() users[f][u] = 1; depth = 1; made_a_change = True; while made_a_change and depth < 10: made_a_change = False; print "Running round "+str(depth) depth = depth+1 for source in completed: for mid in users[source].keys(): for dest in users[mid].keys(): new_path_length = int(users[source][mid]) + int(users[mid][dest]) if dest not in users[source].keys() or users[source][dest] > new_path_length: print "Better path ("+str(new_path_length)+" from " + str(source) + " to " + str(dest) + " through " + str(mid) users[source][dest] = new_path_length; made_a_change = True; print users; min_dist = 1000; min_user = None; for u,distances in users.iteritems(): tot = sum(distances.values()) avg = float(tot) / float(len(distances)) print "Average distance for " + str(u) + " : " + str(avg) if avg < min_dist and int(u) != int(me): min_dist = avg; min_user = u; print "The most central user is "+str(min_user)+" with an average bacon-distance of "+str(min_dist) user = api.GetUser(user_id = min_user) print "The user's name is "+user.name + " a.k.a. @"+user.screen_name