#!/usr/bin/python # Minas Gjoka import urllib,re, sys, time, random, datetime, time, signal import urllib2, cookielib, httplib, gzip from mechanize import Browser def sleeprandom(minsecs,maxsecs): num = minsecs + random.random()*maxsecs print 'Sleeping for %f' % (num) time.sleep(num) try: filename = sys.argv[1] netid = int(sys.argv[2]) email = sys.argv[3] password = sys.argv[4] except: print 'Usage:', sys.argv[0], 'uidfilename networkid email password' sys.exit(1) USER_AGENT = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.1) Gecko/20071106 Iceweasel/2.0.0.5 (Debian-2.0.0.1+dfsg-1)" br = Browser() br.addheaders = [("User-agent", USER_AGENT)] br.open("https://www.facebook.com") br.select_form("loginform") br['email'] = email br['pass'] = password br['persistent'] = ["1"] response = br.submit() hash_uids = {} fin = open(filename, "r") for line in fin: hash_uids[int(line)] = 0 fin.close() print "%d root uids found in file %s\n" % (len(hash_uids), filename) #####read and set to append mode the list of FOFs friend_filename = "%s-friends" % (filename) friends_hash_uids = {} f = open(friend_filename, "r") for line in f: friends_hash_uids[int(line)] = 0 f.close() print "%d friend uids found in file %s" % (len(friends_hash_uids), friend_filename) f = open(friend_filename, "a") ##### fof_RegExp = re.compile(r"") counter = 0 k = hash_uids.keys() random.shuffle(k) for uid in k: samenet_friends = "http://www.facebook.com/friends/?id=%d&nk=%d" % (uid,netid) br.clear_history() while 1: ok = 0 try : sleeprandom(0,1.5) network = br.open(samenet_friends) # random selection of up to 10 people each time htmlSource = network.read() ok = 1 except: print "Error encountered: retrying downloading friends list" ok = 0 if ok==1: break counter = counter + 1 n_openprofiles = len(fof_RegExp.findall(htmlSource)) print "Random retrieval successful: %d unique friend UIDs in iteration %d \t\t %d open profiles found to process next" % \ (len(friends_hash_uids), counter, n_openprofiles) iterator = fof_RegExp.finditer(htmlSource) for match in iterator: f_uid = int(match.group(1)) f.write('%d\n' % f_uid) f.flush() if (f_uid not in friends_hash_uids) and (f_uid not in hash_uids): url_profile = "http://www.facebook.com/profile.php?id=%d" % f_uid max_waittime = 0.25 while 1: ok = 0 try : sleeprandom(0,max_waittime) network = br.open(url_profile) # random selection of up to 10 people each time htmlSource = network.read() ok = 1 except: print "Error encountered: retrying" ok = 0 max_waittime = max_waittime*2 if ok==1: break print "Friend Profile %d retrieved" % f_uid f_profile = gzip.open("profile-%d.gz" % f_uid,"w") f_profile.write("%s\n" % htmlSource) f_profile.close() friends_hash_uids[f_uid] = 1