__author__ = 'Luchen'
import matplotlib.pyplot as pyplot
from matplotlib.backends.backend_pdf import PdfPages

def plotting(data_index, ylabel, fig_name):
    fp = open('../data/rbo_w2t_t2w')
    x = []
    y = []
    bucket = []
    bucket_size = 1000
    percentage = []
    for k in range(11):
        percentage.append([])
    i, score = 0, 0
    for i, line in enumerate(fp):
        line_split = line.strip().split('\t')
        score = float(line_split[data_index])  # 1: rbo 2: w2t 3: t2w
        if i % bucket_size == 0 and i > 0:
            y.append(sum(bucket)/float(bucket_size))
            x.append(i/bucket_size)
            bucket.sort()
            for k in range(11):
                index = max(k * bucket_size / 10, 1) - 1
                #print index
                percentage[k].append(bucket[index])
            bucket = [score]
        else:
            bucket.append(score)
    fp.close()
    x.append(i/bucket_size)
    y.append(sum(bucket)/float(len(bucket)))
    plot_handels = []
    for k in range(11):
        index = k * len(bucket) / 10 - 1
        percentage[k].append(bucket[index])
        line,  = pyplot.plot(x, percentage[k], label='Line' + str(k+1))
        plot_handels.append(line)
    plot_handels.reverse()
    pyplot.xlabel('Twitter Frequency Rank (* 1000)', fontsize=20)
    pyplot.ylabel(ylabel, fontsize=20)
    pyplot.axis([0, 510, -1, 1])
    pyplot.legend(plot_handels, ['Highest', 'Top 10%', 'Top 20%', 'Top 30%', 'Top 40%', 'Median', 'Top 60%', 'Top 70%', 'Top 80%', 'Top 90%', 'Lowest'],
                  loc=3, fontsize='small', ncol=3)
    #pyplot.show()
    #pyplot.plot(x, y, linewidth=2.0)
    pp = PdfPages(fig_name + '.pdf')
    pp.savefig(bbox_inches='tight')
    pyplot.close()
    pp.close()


if __name__ == '__main__':

    plotting(1, 'RBO value', 'RBO_TwitterDF_percentage')
    plotting(2, 'Wikipedia to Twitter Cosine Similarity', 'w2t_TwitterDF_percentage')
    plotting(3, 'Twitter to Wikipedia Cosine Similarity', 't2w_TwitterDF_percentage')