#!/usr/bin/python # Minas Gjoka import urllib,re, sys, time, random, datetime, time, csv, os from bisect import bisect #from scipy import * try: filename = sys.argv[1] except: print 'Usage:', sys.argv[0], 'filename' sys.exit(1) fin = open(filename,"r") comment_RegExp = re.compile('^#') hashlist_apps = {} for line in fin: if not comment_RegExp.search(line.strip()): fields=line.split() hashlist_apps[ int(fields[0]) ] = int(fields[1]) #numofusers = 67*10**6 scalefactor= 1 numofusers = 289880/scalefactor list_coloredballs = hashlist_apps.values() list_allusers = numofusers*[0] alpha= 1.6 zero_bin_init = 5 nusers_covered = {} users_withoutapps = set() for i in range(numofusers): users_withoutapps.add(i) apps_uid = {} for ball_color in xrange(len(list_coloredballs)): numofballs = list_coloredballs[ball_color]/scalefactor print "\nBall color %d with %d balls " % (ball_color, numofballs) #userlist_random = random.sample(xrange(0,numofusers),min(numofballs,numofusers-1)) if numofballs == 0: break #list_users_nocolorapp = range(0,numofusers) p_users_nocolorapp = (numofusers-1)*[0] s = 0 for i in list_allusers: s = s + pow(i,alpha) sum_denominator = float(s+ zero_bin_init*numofusers) p_users_nocolorapp[0] = (pow(list_allusers[ 0 ],alpha)+zero_bin_init)/ sum_denominator # first bin initialized for i in xrange(1,len(p_users_nocolorapp)): p_users_nocolorapp[i] = (pow(list_allusers[i],alpha)+zero_bin_init)/sum_denominator + p_users_nocolorapp[i-1] #print p_users_nocolorapp unique_balls = set() while len(unique_balls)