#!/usr/bin/python # Minas Gjoka import urllib,re, sys, time, random, datetime, time, signal, urllib2, cookielib, httplib, gzip, glob, os, csv import Gnuplot, Gnuplot.funcutils ########################################################### g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 2') g.xlabel('Day') g.ylabel('Number of applications') g('set terminal postscript monochrome enhanced "Arial" 22') g('set output "nofapps.eps"') g('plot "nofapps.dat" using ($1-66):2 with points title "Number of apps"') #--------------- g.ylabel('Active Users over all applications') g('set output "dau.eps"') g('plot "dau.dat" using ($1-66):2 with linespoints lw 2 title "Accumulated DAU"') #--------------- g.ylabel('Total Installs over all applications') g('set output "totalinstalls.eps"') g('plot "totalinstalls.dat" using ($1-66):2 with points title "Accumulated Total Installs"') #----------------- g.ylabel('Ratio') g('set terminal postscript monochrome enhanced "Arial" 21') g('set output "dauovertotalinstalls.eps"') g('plot "dauovertotalinstalls.dat" using ($1-66):2 with linespoints lw 2 title "Ratio of Accumulated DAU over Total Installs"') #---combination-- napps + total installs- g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 2') g('set xlabel "Day"') g('set ylabel "Number of applications"') g('set y2label "Accumulated total installs"') g('set key bottom right') g('set y2tics') g('set terminal postscript monochrome enhanced "Arial" 22') g('set output "nofapps_n_totalinstalls.eps"') g('plot "nofapps.dat" every 2 using ($1-66):2 with points axis x1y1 lw 1 pt 1 title "Number of apps",\ "totalinstalls.dat" every 2 using ($1-66):2 with points axis x1y2 lw 1 pt 7 title "Accumulated Total Installs"') ##########Power law - Exponent in time########################### g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 2') g.xlabel('Day') g.ylabel('Gamma') g('set terminal postscript color enhanced "Arial" 20') g('set output "dau-slope-powerlaw.eps"') g('plot [][0:2]"dau-slope-powerlaw.dat" using 1:(-$2+1) with points title "Slope for DAU popularity"') g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 2') g.xlabel('Day') g.ylabel('Gamma') g('set terminal postscript color enhanced "Arial" 20') g('set output "totalinstalls-slope-powerlaw.eps"') g('plot [][0:2]"totalinstalls-slope-powerlaw.dat" using 1:(-$2+1) with points title "Slope for Total Installs"') ##########Periodicity################ g = Gnuplot.Gnuplot(debug=0) g('set terminal postscript color enhanced "Arial" 20') g('set output "periodicity.eps"') g('plot "periodicity.dat" using 1:2 with linespoints title "Power"') ############Average Activity############## g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 2') g.xlabel('Daily Activity') g.ylabel('Value') g('set terminal postscript color enhanced "Arial" 20') g('set output "average-activity.eps"') g('plot "average-activity.dat" using 1:2 with linespoints title "Activity"') ###########Ranking and CCDF of DAU################ g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 2') g('set logscale x') g('set logscale y') g.xlabel('Daily Active Users') g.ylabel('CCDF') g('set terminal postscript color enhanced "Arial" 20') g('set output "ccdf-dau-lastday.eps"') g('plot "ccdf-dau-lastday.dat" using 1:2 with linespoints title "Active Users"') #---------------- g('set xlabel "Rank"') g('set ylabel "Active Users"') g('set output "rank-dau-lastday.eps"') g('plot "rank-dau-lastday.dat" using 1:2 with linespoints title "Active Users"') ###########Ranking and CCDF of Total installs################ g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 2') g('set logscale x') g('set logscale y') g.xlabel('Total Installs (one day)') g.ylabel('CCDF') g('set terminal postscript color enhanced "Arial" 20') g('set output "ccdf-totalinstalls-lastday.eps"') g('plot "ccdf-totalinstalls-lastday.dat" using 1:2 with linespoints title "Total Installs"') #------------ g('set xlabel "Rank"') g('set ylabel "Total Installs"') g('set output "rank-totalinstalls-lastday.eps"') g('plot "rank-totalinstalls-lastday.dat" using 1:2 with linespoints title "Total Installs"') ############Combine CDF of Total Installs and DAU############## g = Gnuplot.Gnuplot(debug=0) g('set grid') g('set pointsize 2') g('set logscale x') g('set logscale y') g.xlabel('Value') g.ylabel('CCDF') g('set terminal postscript monochrome enhanced "Arial" 22') g('set output "ccdf-combined-popul-lastday.eps"') g('plot "ccdf-dau-lastday.dat" using 1:2 with points title "Active Users" ,\ "ccdf-totalinstalls-lastday.dat" using 1:2 with points title "Total Installs') ##########Ranking and CCDF of Percentage of active users################# g('set grid') g('set pointsize 2') g('set logscale x') g('set logscale y') g.xlabel('Ranking') g.ylabel('Activity as a percentage of Total Installs') g('set terminal postscript color enhanced "Arial" 20') g('set output "rank-perc-lastday.eps"') g('plot "rank-perc-lastday.dat" using 1:2 with linespoints title "Activity Percentage"') ###########################3 point_types = [7,4,3,13, 1,2 ,6] def plotdata(data_set,postfix): array = [app for app in data_set] s = 'plot ' for i in range(len(array)): appname = array[i] appid = apps[appname] s = '%s "popular-%s" every 2 using ($1-66):2 with linespoints lw 2 pt %d title "%s", ' % (s,appid,point_types[i],appname) s = s[0:len(s)-2] g = Gnuplot.Gnuplot(debug=0) g('set grid') #g('set logscale x') #g('set logscale y') g('set pointsize 1.5') g('set key on') g('set key top right') g.xlabel('Day') g.ylabel('Daily Active users') g('set terminal postscript monochrome enhanced "Arial" 20') g('set output "mostpopular-apps_%s.eps"' % postfix) g('%s' % s) fset = glob.glob('popular-*') print "Number of popular apps:" + str(len(fset)) highestdau_appid = {} for name in fset: namefields = os.path.basename(name).split("-") appid = int(namefields[1]) fin = open(name,"r") highestdau_appid[ appid ] = 0 for line in fin: fields = line.split() if highestdau_appid[appid]