#!/usr/bin/python # Minas Gjoka import urllib,re, sys, time, random, datetime, time, signal, urllib2, cookielib, httplib, gzip, glob, os, csv import Gnuplot, Gnuplot.funcutils try: name_crawled = sys.argv[1] name_analytics = sys.argv[2] except: print 'Usage:', sys.argv[0], 'rank_crawled rank_analytics' sys.exit(1) crawled = {} sum_crawled = 0 facebook_given = {} sum_facebook_given = 0 f = open(name_crawled,"r") comment_RegExp = re.compile('^#') for line in f: if not comment_RegExp.search(line.strip()): fields=line.split() crawled[ int(fields[0]) ] = int(fields[1]) sum_crawled = sum_crawled + int(fields[1]) f = open(name_analytics,"r") comment_RegExp = re.compile('^#') for line in f: if not comment_RegExp.search(line.strip()): fields=line.split() facebook_given[ int(fields[0]) ] = int(fields[1]) sum_facebook_given = sum_facebook_given + int(fields[1]) fout = open("compare-sample-dataset.dat","w") fout.write("#Rank Facebook Crawled RatioFacebook(%d) RatioCrawled(%d)\n" % (sum_facebook_given,sum_crawled) ) for i in range(1, min(len(crawled.keys()), len(facebook_given)) + 1 ): fout.write("%d %d %d %f %f\n" % (i, facebook_given[i], crawled[i], \ facebook_given[i]/float(sum_facebook_given), crawled[i]/float(sum_crawled), ) ) fout.close() g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 1.5') g('set logscale x') g('set logscale y') g.xlabel('Ranked Applications\\n') g.ylabel('Ratio of Installs over all application installs') g('set terminal postscript monochrome enhanced "Arial" 20') g('set output "compare-sample-dataset.eps"') g('plot "< cat compare-sample-dataset.dat | awk \'BEGIN{i=0; period=2} { if (NR>0) { i=i+1 } ; print }\'" using 1:4 with lines lt 4 lw 5 title "Facebook Statistics",\ "< cat compare-sample-dataset.dat | awk \'BEGIN{i=0; period=2} { if (NR>0) { i=i+1 } ; print }\'" using 1:5 with lines lt 19 lw 5 title "Crawled Dataset"')