Website/src/news/2014-10-17-csta-data-nalyti...

5.6 KiB

title author
CSTA @nalytics Workshop Oliver Kennedy

Oliver is presenting a workshop on Data @nalytics at The WNY-CSTA Fall Conference.  Hello to all the high-school teachers in attendance!

The solution developed in class

import sys;
import twitter;
import pickle;
import time;
from cache import read_cache, write_cache;
from keys import consumer_key, consumer_secret, access_token, access_token_secret;

api = twitter.Api(consumer_key = consumer_key,
        consumer_secret = consumer_secret,
        access_token_key = access_token,
        access_token_secret = access_token_secret
    );

#print api.VerifyCredentials();
# Rate limit status

def followers(user_id):
  try:
    followers = read_cache(str(user_id)+"_followers")
    print "Cached results for "+str(user_id)
  except:
    sleep_time = api.GetSleepTime("followers/ids");
    if sleep_time != 0:
      print "Goodnight for "+str(sleep_time)+" seconds";
      sleep(sleep_time)
    followers = api.GetFollowerIDs(user_id)
    write_cache(str(user_id)+"_followers", followers)
  return followers

print followers(45606271)

My complete solution:

import sys;
import twitter;
import pickle;
import time;
from keys import consumer_key, consumer_secret, access_token, access_token_secret;

api = twitter.Api(consumer_key = consumer_key, 
 consumer_secret = consumer_secret,
 access_token_key = access_token,
 access_token_secret = access_token_secret);

#print api.VerifyCredentials();

# Rate limit status
#print api.GetRateLimitStatus();

# Average time to sleep per request to avoid exceeding the threshold.
# or 0 if the threshold has been reached
sleep_time = max(
 api.GetAverageSleepTime('followers/ids'), 
 api.GetAverageSleepTime('friends/ids')
 );

print "Will probably need to sleep "+str(sleep_time)+" seconds per request"

# Time required to sleep per request if the threshold has been reached
# or 0 if the threshold has not been reached
#print api.GetSleepTime('followers/ids')
#print api.GetSleepTime('friends/ids')

#print api.GetFollowerIDs(45606271);
#print api.GetFriendIDs(45606271);


def read_cache(cname):
 cname = "cache/"+cname;
 with open(cname) as f:
 ret = pickle.load(f)
 f.close()
 return ret;

def write_cache(cname, value):
 cname = "cache/"+cname;
 with open(cname, 'w+') as f:
 pickle.dump(value, f)
 f.close()
 return value;

def sleep_for(time_to_sleep):
 if time_to_sleep > 0:
 sleep_interval = 15
 print ("_"*int((time_to_sleep+sleep_interval-1) / sleep_interval))
 sys.stdout.flush();
 while time_to_sleep > 0:
 time.sleep(min(time_to_sleep, sleep_interval))
 sys.stdout.write("=");
 sys.stdout.flush();
 time_to_sleep -= sleep_interval;
 print "";


def followers(uid):
 try:
 ret = read_cache(str(uid)+"_followers");
# print "followers of user " + str(uid) + " are cached";
 return ret;
 except:
 sleep_time = api.GetSleepTime('followers/ids')
 if sleep_time > 0: 
 sleep_time += 30
 print "Need to fetch followers of "+str(uid)+"; sleeping for "+str(sleep_time)
 sleep_for(sleep_time);
 try:
 return write_cache(
 str(uid)+"_followers", 
 api.GetFollowerIDs(uid, count = 100, total_count = 100)
 )
 except twitter.TwitterError, e:
 print "Caught: " + str(e);
 if str(e) == "Not authorized.":
 return write_cache(
 str(uid)+"_followers", 
 []
 )
 raise e

#def friends(uid):
# try:
# ret = read_cache(str(uid)+"_friends");
# print "friends of user " + str(uid) + " are cached";
# return ret;
# except:
# sleep_time = api.GetSleepTime('friends/ids')
# print "Need to fetch friends of "+str(uid)+"; sleeping for "+str(sleep_time)
# time.sleep(sleep_time);
# return write_cache(
# str(uid)+"_friends", 
# api.GetFriendIDs(uid, count = 200)
# )

def connected(uid):
 return set(followers(uid))# | set(friends(uid))

me = 45606271;
completed = set();
todo = [ me ];

while len(todo) > 0 and len(completed) < 300:
 next = todo.pop(0);
 if(next not in completed):
 connections = connected(next);
# print("Connections for "+str(next)+": "+str(connections));
 todo.extend(followers(next));
 completed.add(next)

print "Completed: " + str(completed)

users = dict()
for u in completed:
 u_followers = list(set(followers(u)) & completed);
 if u not in users:
 users[u] = dict()
 for f in u_followers:
 users[u][f] = 1;
 if f not in users: 
 users[f] = dict()
 users[f][u] = 1;

depth = 1;
made_a_change = True;
while made_a_change and depth < 10:
 made_a_change = False;
 print "Running round "+str(depth)
 depth = depth+1
 for source in completed:
 for mid in users[source].keys():
 for dest in users[mid].keys():
 new_path_length = int(users[source][mid]) + int(users[mid][dest])
 if dest not in users[source].keys() or users[source][dest] > new_path_length:
 print "Better path ("+str(new_path_length)+" from " + str(source) + " to " + str(dest) + " through " + str(mid)
 users[source][dest] = new_path_length;
 made_a_change = True;

print users;
min_dist = 1000;
min_user = None;

for u,distances in users.iteritems():
 tot = sum(distances.values()) 
 avg = float(tot) / float(len(distances))
 print "Average distance for " + str(u) + " : " + str(avg)
 if avg < min_dist and int(u) != int(me):
 min_dist = avg;
 min_user = u;

print "The most central user is "+str(min_user)+" with an average bacon-distance of "+str(min_dist)
user = api.GetUser(user_id = min_user)
print "The user's name is "+user.name + " a.k.a. @"+user.screen_name