__author__ = 'luchen'
import operator
import matplotlib.pyplot as pyplot
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
data_path = '/home/luchen/PycharmProjects/word2vec/data/w2t_distance_to_median'
wiki_dict_path = '/home/luchen/PycharmProjects/word2vec/data/wiki_dict_sortbytf'
tweet_dict_path = '/home/luchen/PycharmProjects/word2vec/data/tweet_dict_sortbydf'


def read_in_corpus_dict(path, top_num=None):
    dictionary = dict()
    for i, line in enumerate(open(path)):
        line = line.strip().split('\t')
        dictionary[line[1]] = line[0]
        if top_num and i+2 > top_num:
            break
    return dictionary


def read_in_score(path, base_dict, type, top_num, term_index=0, score_index=1, split_tag='\t'):
    dictionary = dict()
    count = 0
    for i, line in enumerate(open(path)):
        line_split = line.strip().split(split_tag)
        term = line_split[term_index]
        score = float(line_split[score_index])
        try:
            found = base_dict[term]
            dictionary[term] = score
            count += 1
        except KeyError:
            continue
    sorted_dict = sorted(dictionary.items(), key=operator.itemgetter(1))
    with open(path + '_top' + str(top_num) + '_' + type, 'w') as out:
        out.write('\n'.join([item[0]+'\t'+str(item[1]) for item in sorted_dict]))
    return dictionary


def combine_dict_value(x_dict, y_dict):
    x = []
    y = []
    for key, value in x_dict.iteritems():
        try:
            score = y_dict[key]
            x.append(value)
            y.append(score)
            if float(value) >= 0.25 and float(score) >= 0.25:
                print key, ' x:', value, ' y:', score
            elif float(value) <= 0.01 and float(score) <= 0.1:
                print key, ' x:', value, ' y:', score
        except KeyError:
            continue
    return x, y

def combine_dict_value_by_fix_num(x_dict, y_dict, bucket_size=30):
    x = []
    y = []
    pair = []
    for key, value in x_dict.iteritems():
        try:
            score = y_dict[key]
            pair.append((value, score))
        except KeyError:
            continue
    sorted_pair = sorted(pair, key=lambda item:item[0])
    bucket = []
    for i in range(len(sorted_pair)):
        if i > 0 and i % bucket_size == 0:

            x.append(sum([p[0] for p in bucket])/len(bucket))
            y.append(sum([p[1] for p in bucket])/len(bucket))
            bucket = [sorted_pair[i]]
        else:
            bucket.append(sorted_pair[i])
    print len(bucket)
    x.append(sum([p[0] for p in bucket])/len(bucket))
    y.append(sum([p[1] for p in bucket])/len(bucket))
    return x, y


def combine_dict_value_by_fix_num_window(x_dict, y_dict, bucket_size=100, step=10):
    x = []
    y = []
    pair = []
    for key, value in x_dict.iteritems():
        try:
            score = y_dict[key]
            pair.append((value, score))
        except KeyError:
            continue
    sorted_pair = sorted(pair, key=lambda item: item[0])
    bucket = []
    start = 0
    end = bucket_size
    for i in range(len(sorted_pair)):
        if start <= i < end:
            bucket.append(sorted_pair[i])
        else:
            print len(bucket), start, end, i

            x.append(sum([p[0] for p in bucket])/len(bucket))
            y.append(sum([p[1] for p in bucket])/len(bucket))
            start += step
            end += step
            bucket = bucket[step:]
            print len(bucket)
            bucket.append(sorted_pair[i])
    print len(bucket)
    x.append(sum([p[0] for p in bucket])/len(bucket))
    y.append(sum([p[1] for p in bucket])/len(bucket))
    return x, y


def combine_dict_value_by_window_average(x_dict, y_dict, window_size=0.05, step=0.01):
    x = []
    y = []
    pair = []
    for key, value in x_dict.iteritems():
        try:
            score = y_dict[key]
            pair.append((value, score))
        except KeyError:
            continue
    sorted_pair = sorted(pair, key=lambda item:item[0])
    min_point = float(sorted_pair[0][0])
    total_max = float(sorted_pair[-1][0])
    max_point = min_point + window_size
    total_y = []
    for item in sorted_pair:
        x_value = item[0]
        if min_point <= x_value < max_point:
            total_y.append(item)

        else:
            total_y.append(item)

            x.append((min_point+max_point)/2)
            y.append(sum([v[1] for v in total_y])/len(total_y))
            min_point += step
            max_point += step
            total_y = [point for point in total_y if point[0] >= min_point]

    x.append(sorted_pair[-1][0])
    y.append(sorted_pair[-1][1])
    return x, y


def plotting_average_rbo_distance(rbo_result_file, out_suffix):
    num = 5000
    wiki_dict = read_in_corpus_dict(wiki_dict_path, top_num=num)

    rbo_wiki = read_in_score('../data/' + rbo_result_file, wiki_dict, 'wiki', num)
    print len(rbo_wiki.keys()), 'RBO wiki'
    w2t_wiki = read_in_score('../data/w2t_distance_to_median_withnegative', wiki_dict, 'wiki', num)
    print len(w2t_wiki.keys()), 'W2T wiki'
    t2w_wiki = read_in_score('../data/t2w_distance_to_median_withnegative', wiki_dict, 'wiki', num)
    print len(t2w_wiki.keys()), 'T2W wiki'

    w2t_wiki_cs = read_in_score('../data/' + rbo_result_file, wiki_dict, 'wiki', num, score_index=2)
    print len(w2t_wiki_cs.keys()), 'W2T Cosine wiki'
    t2w_wiki_cs = read_in_score('../data/' + rbo_result_file, wiki_dict, 'wiki', num, score_index=3)
    print len(t2w_wiki_cs.keys()), 'T2W Cosine wiki'

    print 'w2t rbo wiki'
    w2t_x, rbo_y = combine_dict_value_by_fix_num_window(w2t_wiki, rbo_wiki)
    #pyplot.plot([0,1], [1,0], linestyle='-')

    pyplot.plot(w2t_x, rbo_y, '.')
    pyplot.xlabel('Average Wikipedia to Twitter adjusted distance(Top 5000 in Wikipedia)')
    pyplot.ylabel('Average RBO')
    pyplot.savefig('w2t_rbo_wiki_' + out_suffix, type='png')
    pyplot.close()


    print 't2w rbo wiki'
    t2w_x, rbo_y = combine_dict_value_by_fix_num_window(t2w_wiki, rbo_wiki)
    pyplot.plot(t2w_x, rbo_y, '.')
    #pyplot.plot([0,1], [1,0], linestyle='-')
    pyplot.xlabel('Average Twitter to Wikipedia adjusted distance(Top 5000 in Wikipedia)')
    pyplot.ylabel('Average RBO')
    pyplot.savefig('t2w_rbo_wiki_' + out_suffix, type='png')
    pyplot.close()


    w2t_x, rbo_y = combine_dict_value_by_fix_num_window(w2t_wiki_cs, rbo_wiki)
    pyplot.plot(w2t_x, rbo_y, '.')
    #pyplot.plot([0,0], [1,1], linestyle='-')
    pyplot.xlabel('Average Wikipedia to Twitter cosine similarity(Top 5000 in Wikipedia)')
    pyplot.ylabel('Average RBO')
    pyplot.savefig('w2t_rbo_wiki_cosine_top' + out_suffix, type='png')
    pyplot.close()

    t2w_x, rbo_y = combine_dict_value_by_fix_num_window(t2w_wiki_cs, rbo_wiki)
    pyplot.plot(t2w_x, rbo_y, '.')
    #pyplot.plot([0,0], [1,1], linestyle='-')
    pyplot.xlabel('Average Twitter to Wikipedia cosine similarity(Top 5000 in Wikipedia)')
    pyplot.ylabel('Average RBO')
    pyplot.savefig('t2w_rbo_wiki_cosine_top' + out_suffix, type='png')
    pyplot.close()


    tweet_dict = read_in_corpus_dict(tweet_dict_path, top_num=num)
    rbo_tweet = read_in_score('../data/' + rbo_result_file, tweet_dict, 'tweet', num)
    print len(rbo_tweet.keys()), 'RBO Twitter'
    w2t_tweet = read_in_score('../data/w2t_distance_to_median_withnegative', tweet_dict, 'tweet', num)
    print len(w2t_tweet.keys()), 'W2T Twitter'
    t2w_tweet = read_in_score('../data/t2w_distance_to_median_withnegative', tweet_dict, 'tweet', num)
    print len(t2w_tweet.keys()), 'T2W Twitter'

    w2t_tweet_cs = read_in_score('../data/' + rbo_result_file, tweet_dict, 'tweet', num, score_index=2)
    print len(w2t_tweet_cs.keys()), 'W2T Cosine Twitter'
    t2w_tweet_cs = read_in_score('../data/' + rbo_result_file, tweet_dict, 'tweet', num, score_index=3)
    print len(t2w_tweet_cs.keys()), 'T2W Cosine Twitter'

    print 'w2t rbo tweet'
    w2t_x, rbo_y = combine_dict_value_by_fix_num_window(w2t_tweet, rbo_tweet)
    pyplot.plot(w2t_x, rbo_y, '.')
    #pyplot.axis([0, 0.4, 0, 0.2])
    #pyplot.plot([0,1], [1,0], linestyle='-')
    pyplot.tick_params(axis='x', size=20, labelsize=20)
    pyplot.tick_params(axis='y', size=20, labelsize=20)
    pyplot.xlabel('Average W2T adjusted distance(Top 5000 in Twitter)', fontsize=20)
    pyplot.ylabel('Average RBO', fontsize=20)
    pp = PdfPages('w2t_rbo_tweet_' + out_suffix + '.pdf')
    pp.savefig(bbox_inches='tight')
    pyplot.close()
    pp.close()


    print 't2w rbo tweet'
    t2w_x, rbo_y = combine_dict_value_by_fix_num_window(t2w_tweet, rbo_tweet)
    pyplot.plot(t2w_x, rbo_y, '.')
    #pyplot.plot([0, 0.45], [0.45, 0], linestyle='--')
    #pyplot.axis([0, 0.45, 0, 0.2])
    pyplot.tick_params(axis='x', size=20, labelsize=20)
    pyplot.tick_params(axis='y', size=20, labelsize=20)
    pyplot.xlabel('Average T2W adjusted distance(Top 5000 in Twitter)', fontsize=20)
    pyplot.ylabel('Average RBO', fontsize=20)
    pp = PdfPages('t2w_rbo_tweet_' + out_suffix + '.pdf')
    pp.savefig(bbox_inches='tight')
    pyplot.close()
    pp.close()

    w2t_x, rbo_y = combine_dict_value_by_fix_num_window(w2t_tweet_cs, rbo_tweet)
    pyplot.plot(w2t_x, rbo_y, '.')

    #pyplot.plot([0,0], [1,1], linestyle='-')
    pyplot.xlabel('Average W2T cosine similarity(Top 5000 in Twitter)', fontsize=18)
    pyplot.ylabel('Average RBO', fontsize=18)
    pyplot.savefig('w2t_rbo_tweet_cosine_top' + out_suffix, type='png')
    pyplot.close()

    t2w_x, rbo_y = combine_dict_value_by_fix_num_window(t2w_tweet_cs, rbo_tweet)
    pyplot.plot(t2w_x, rbo_y, '.')
    #pyplot.plot([0,0], [1,1], linestyle='--')
    pyplot.xlabel('Average Twitter to Wikipedia cosine similarity(Top 5000 in Twitter)')
    pyplot.ylabel('Average RBO')
    pyplot.savefig('t2w_rbo_tweet_cs_top' + out_suffix, type='png')
    pyplot.close()

def plotting_no_distance_range(rbo_result_file, out_suffix):
    num = 5000
    wiki_dict = read_in_corpus_dict(wiki_dict_path, top_num=num)

    rbo_wiki = read_in_score('../data/' + rbo_result_file, wiki_dict, 'wiki', num)
    print len(rbo_wiki.keys()), 'RBO wiki'
    # w2t_wiki_cs = read_in_score('../data/rbo_w2t_t2w', wiki_dict, 'wiki', num, score_index=2)
    # print len(w2t_wiki_cs.keys()), 'W2T Cosine wiki'
    w2t_wiki = read_in_score('../data/w2t_distance_to_median', wiki_dict, 'wiki', num)
    print len(w2t_wiki.keys()), 'W2T wiki'
    # t2w_wiki_cs = read_in_score('../data/rbo_w2t_t2w', wiki_dict, 'wiki', num, score_index=3)
    # print len(t2w_wiki_cs.keys()), 'T2W Cosine wiki'
    t2w_wiki = read_in_score('../data/t2w_distance_to_median', wiki_dict, 'wiki', num)
    print len(t2w_wiki.keys()), 'T2W wiki'

    print 'w2t rbo wiki'
    w2t_x, rbo_y = combine_dict_value(w2t_wiki, rbo_wiki)
    pyplot.plot(w2t_x, rbo_y, '.')
    pyplot.xlabel('Wikipedia project to Twitter adjusted distance(Top 5000 in Wikipedia)')
    pyplot.ylabel('RBO')
    pyplot.savefig('w2t_rbo_wiki_' + out_suffix, type='png')
    pyplot.close()

    print 't2w rbo wiki'
    t2w_x, rbo_y = combine_dict_value(t2w_wiki, rbo_wiki)
    pyplot.plot(t2w_x, rbo_y, '.')
    pyplot.xlabel('Twitter project to Wikipedia adjusted distance(Top in Wikipedia)')
    pyplot.ylabel('RBO')
    pyplot.savefig('t2w_rbo_wiki_' + out_suffix, type='png')
    pyplot.close()

    # w2t_x, rbo_y = combine_dict_value(w2t_wiki_cs, rbo_wiki)
    # pyplot.plot(w2t_x, rbo_y, '.')
    # pyplot.xlabel('Wikipedia project to Twitter cosine similarity(Top in Wikipedia)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('w2t_rbo_wiki_cosine_top' + str(num), type='png')
    # pyplot.close()
    #
    # t2w_x, rbo_y = combine_dict_value(t2w_wiki_cs, rbo_wiki)
    # pyplot.plot(t2w_x, rbo_y, '.')
    # pyplot.xlabel('Twitter project to Wikipedia cosine similarity(Top in Wikipedia)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('t2w_rbo_wiki_cosine_top' + str(num), type='png')
    # pyplot.close()

    tweet_dict = read_in_corpus_dict(tweet_dict_path, top_num=num)
    rbo_tweet = read_in_score('../data/' + rbo_result_file, tweet_dict, 'tweet', num)
    print len(rbo_tweet.keys()), 'RBO Twitter'
    # w2t_tweet_cs = read_in_score('../data/rbo_w2t_t2w', tweet_dict, 'tweet', num, score_index=2)
    # print len(w2t_tweet_cs.keys()), 'W2T Cosine Twitter'
    w2t_tweet = read_in_score('../data/w2t_distance_to_median', tweet_dict, 'tweet', num)
    print len(w2t_tweet.keys()), 'W2T Twitter'
    # t2w_tweet_cs = read_in_score('../data/rbo_w2t_t2w', tweet_dict, 'tweet', num, score_index=3)
    # print len(t2w_tweet_cs.keys()), 'T2W Cosine Twitter'
    t2w_tweet = read_in_score('../data/t2w_distance_to_median', tweet_dict, 'tweet', num)
    print len(t2w_tweet.keys()), 'T2W Twitter'

    print 'w2t rbo tweet'
    w2t_x, rbo_y = combine_dict_value(w2t_tweet, rbo_tweet)
    pyplot.plot(w2t_x, rbo_y, '.')
    pyplot.xlabel('Wikipedia project to Twitter adjusted distance(Top in Twitter)')
    pyplot.ylabel('RBO')
    pyplot.savefig('w2t_rbo_tweet_' + out_suffix, type='png')
    pyplot.close()

    print 't2w rbo tweet'
    t2w_x, rbo_y = combine_dict_value(t2w_tweet, rbo_tweet)
    pyplot.plot(t2w_x, rbo_y, '.')
    pyplot.xlabel('Twitter project to Wikipedia adjusted distance(Top in Twitter)')
    pyplot.ylabel('RBO')
    pyplot.savefig('t2w_rbo_tweet_' + out_suffix, type='png')
    pyplot.close()

    # w2t_x, rbo_y = combine_dict_value(w2t_tweet_cs, rbo_tweet)
    # pyplot.plot(w2t_x, rbo_y, '.')
    # pyplot.xlabel('Wikipedia project to Twitter cosine similarity(Top in Twitter)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('w2t_rbo_tweet_cosine_top' + str(num), type='png')
    # pyplot.close()
    #
    # t2w_x, rbo_y = combine_dict_value(t2w_tweet_cs, rbo_tweet)
    # pyplot.plot(t2w_x, rbo_y, '.')
    # pyplot.xlabel('Twitter project to Wikipedia cosine similarity(Top in Twitter)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('t2w_rbo_tweet_cs_top' + str(num), type='png')
    # pyplot.close()

### not using anymore
def plotting_with_range():
    num = 5000
    wiki_dict = read_in_corpus_dict(wiki_dict_path, top_num=num)

    rbo_wiki = read_in_score('../data/rbo_w2t_t2w', wiki_dict, 'wiki', num)
    count = 0
    for key, value in rbo_wiki.iteritems():
        if value == 0:
            count += 1
    print count
    print len(rbo_wiki.keys()), 'RBO wiki'
    # w2t_wiki_cs = read_in_score('../data/rbo_w2t_t2w', wiki_dict, 'wiki', num, score_index=2)
    # print len(w2t_wiki_cs.keys()), 'W2T Cosine wiki'
    w2t_wiki = read_in_score('../data/w2t_distance_to_median_from5to7', wiki_dict, 'wiki', num)
    print len(w2t_wiki.keys()), 'W2T wiki'
    # t2w_wiki_cs = read_in_score('../data/rbo_w2t_t2w', wiki_dict, 'wiki', num, score_index=3)
    # print len(t2w_wiki_cs.keys()), 'T2W Cosine wiki'
    t2w_wiki = read_in_score('../data/t2w_distance_to_median_from5to7', wiki_dict, 'wiki', num)
    print len(t2w_wiki.keys()), 'T2W wiki'

    w2t_x, rbo_y = combine_dict_value(w2t_wiki, rbo_wiki)
    pyplot.plot(w2t_x, rbo_y, '.')
    pyplot.xlabel('Wikipedia project to Twitter adjusted distance(Top 5000 in Wikipedia)')
    pyplot.ylabel('RBO')
    pyplot.savefig('w2t_rbo_wiki_top' + str(num) + '_from5to7', type='png')
    pyplot.close()

    t2w_x, rbo_y = combine_dict_value(t2w_wiki, rbo_wiki)
    pyplot.plot(t2w_x, rbo_y, '.')
    pyplot.xlabel('Twitter project to Wikipedia adjusted distance(Top in Wikipedia)')
    pyplot.ylabel('RBO')
    pyplot.savefig('t2w_rbo_wiki_top' + str(num) + '_from5to7', type='png')
    pyplot.close()

    # w2t_x, rbo_y = combine_dict_value(w2t_wiki_cs, rbo_wiki)
    # pyplot.plot(w2t_x, rbo_y, '.')
    # pyplot.xlabel('Wikipedia project to Twitter cosine similarity(Top in Wikipedia)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('w2t_rbo_wiki_cosine_top' + str(num), type='png')
    # pyplot.close()
    #
    # t2w_x, rbo_y = combine_dict_value(t2w_wiki_cs, rbo_wiki)
    # pyplot.plot(t2w_x, rbo_y, '.')
    # pyplot.xlabel('Twitter project to Wikipedia cosine similarity(Top in Wikipedia)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('t2w_rbo_wiki_cosine_top' + str(num), type='png')
    # pyplot.close()

    tweet_dict = read_in_corpus_dict(tweet_dict_path, top_num=num)
    rbo_tweet = read_in_score('../data/rbo_w2t_t2w', tweet_dict, 'tweet', num)
    count = 0
    for key, value in rbo_tweet.iteritems():
        if value == 0:
            count += 1
    print count
    print len(rbo_tweet.keys()), 'RBO Twitter'
    # w2t_tweet_cs = read_in_score('../data/rbo_w2t_t2w', tweet_dict, 'tweet', num, score_index=2)
    # print len(w2t_tweet_cs.keys()), 'W2T Cosine Twitter'
    w2t_tweet = read_in_score('../data/w2t_distance_to_median_from5to7', tweet_dict, 'tweet', num)
    print len(w2t_tweet.keys()), 'W2T Twitter'
    # t2w_tweet_cs = read_in_score('../data/rbo_w2t_t2w', tweet_dict, 'tweet', num, score_index=3)
    # print len(t2w_tweet_cs.keys()), 'T2W Cosine Twitter'
    t2w_tweet = read_in_score('../data/t2w_distance_to_median_from5to7', tweet_dict, 'tweet', num)
    print len(t2w_tweet.keys()), 'T2W Twitter'

    w2t_x, rbo_y = combine_dict_value(w2t_tweet, rbo_tweet)
    pyplot.plot(w2t_x, rbo_y, '.')
    pyplot.xlabel('Wikipedia project to Twitter adjusted distance(Top in Twitter)')
    pyplot.ylabel('RBO')
    pyplot.savefig('w2t_rbo_tweet_top' + str(num) + '_from5to7', type='png')
    pyplot.close()

    t2w_x, rbo_y = combine_dict_value(t2w_tweet, rbo_tweet)
    pyplot.plot(t2w_x, rbo_y, '.')
    pyplot.xlabel('Twitter project to Wikipedia adjusted distance(Top in Twitter)')
    pyplot.ylabel('RBO')
    pyplot.savefig('t2w_rbo_tweet_top' + str(num) + '_from5to7', type='png')
    pyplot.close()

    # w2t_x, rbo_y = combine_dict_value(w2t_tweet_cs, rbo_tweet)
    # pyplot.plot(w2t_x, rbo_y, '.')
    # pyplot.xlabel('Wikipedia project to Twitter cosine similarity(Top in Twitter)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('w2t_rbo_tweet_cosine_top' + str(num), type='png')
    # pyplot.close()
    #
    # t2w_x, rbo_y = combine_dict_value(t2w_tweet_cs, rbo_tweet)
    # pyplot.plot(t2w_x, rbo_y, '.')
    # pyplot.xlabel('Twitter project to Wikipedia cosine similarity(Top in Twitter)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('t2w_rbo_tweet_cs_top' + str(num), type='png')
    # pyplot.close()


def plotting_adjusted_rbo(folder, common_num, num):


    wiki_dict = read_in_corpus_dict(wiki_dict_path, top_num=num)

    rbo_wiki = read_in_score(folder + 'adjusted_rbo_w2t_t2w_common_' + str(common_num) + '_top' + str(num), wiki_dict, 'wiki', num)
    print len(rbo_wiki.keys()), 'RBO wiki'
    # w2t_wiki_cs = read_in_score('../data/rbo_w2t_t2w', wiki_dict, 'wiki', num, score_index=2)
    # print len(w2t_wiki_cs.keys()), 'W2T Cosine wiki'
    w2t_wiki = read_in_score(folder + 'w2t_distance_to_median', wiki_dict, 'wiki', num)
    print len(w2t_wiki.keys()), 'W2T wiki'
    # t2w_wiki_cs = read_in_score('../data/rbo_w2t_t2w', wiki_dict, 'wiki', num, score_index=3)
    # print len(t2w_wiki_cs.keys()), 'T2W Cosine wiki'
    t2w_wiki = read_in_score(folder + 't2w_distance_to_median', wiki_dict, 'wiki', num)
    print len(t2w_wiki.keys()), 'T2W wiki'

    print 'w2t rbo wiki'
    w2t_x, rbo_y = combine_dict_value(w2t_wiki, rbo_wiki)
    pyplot.plot(w2t_x, rbo_y, '.')
    pyplot.xlabel('Wikipedia project to Twitter adjusted distance(Top 5000 in Wikipedia)')
    pyplot.ylabel('RBO')
    pyplot.savefig('w2t_adjusted_rbo_wiki_common_' + str(common_num) + '_top' + str(num), type='png')
    pyplot.close()

    print 't2w rbo wiki'
    t2w_x, rbo_y = combine_dict_value(t2w_wiki, rbo_wiki)
    pyplot.plot(t2w_x, rbo_y, '.')
    pyplot.xlabel('Twitter project to Wikipedia adjusted distance(Top in Wikipedia)')
    pyplot.ylabel('RBO')
    pyplot.savefig('t2w_adjusted_rbo_wiki_common_' + str(common_num) + '_top' + str(num), type='png')
    pyplot.close()

    # w2t_x, rbo_y = combine_dict_value(w2t_wiki_cs, rbo_wiki)
    # pyplot.plot(w2t_x, rbo_y, '.')
    # pyplot.xlabel('Wikipedia project to Twitter cosine similarity(Top in Wikipedia)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('w2t_rbo_wiki_cosine_top' + str(num), type='png')
    # pyplot.close()
    #
    # t2w_x, rbo_y = combine_dict_value(t2w_wiki_cs, rbo_wiki)
    # pyplot.plot(t2w_x, rbo_y, '.')
    # pyplot.xlabel('Twitter project to Wikipedia cosine similarity(Top in Wikipedia)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('t2w_rbo_wiki_cosine_top' + str(num), type='png')
    # pyplot.close()

    tweet_dict = read_in_corpus_dict(tweet_dict_path, top_num=num)
    rbo_tweet = read_in_score(folder + 'adjusted_rbo_w2t_t2w_common_' + str(common_num) + '_top' + str(num), tweet_dict, 'tweet', num)
    print len(rbo_tweet.keys()), 'RBO Twitter'
    # w2t_tweet_cs = read_in_score('../data/rbo_w2t_t2w', tweet_dict, 'tweet', num, score_index=2)
    # print len(w2t_tweet_cs.keys()), 'W2T Cosine Twitter'
    w2t_tweet = read_in_score(folder + 'w2t_distance_to_median', tweet_dict, 'tweet', num)
    print len(w2t_tweet.keys()), 'W2T Twitter'
    # t2w_tweet_cs = read_in_score('../data/rbo_w2t_t2w', tweet_dict, 'tweet', num, score_index=3)
    # print len(t2w_tweet_cs.keys()), 'T2W Cosine Twitter'
    t2w_tweet = read_in_score(folder + 't2w_distance_to_median', tweet_dict, 'tweet', num)
    print len(t2w_tweet.keys()), 'T2W Twitter'

    print 'w2t rbo tweet'
    w2t_x, rbo_y = combine_dict_value(w2t_tweet, rbo_tweet)
    pyplot.plot(w2t_x, rbo_y, '.')
    pyplot.xlabel('Wikipedia project to Twitter adjusted distance(Top in Twitter)')
    pyplot.ylabel('RBO')
    pyplot.savefig('w2t_adjusted_rbo_tweet_common_' + str(common_num) + '_top' + str(num), type='png')
    pyplot.close()

    print 't2w rbo tweet'
    t2w_x, rbo_y = combine_dict_value(t2w_tweet, rbo_tweet)
    pyplot.plot(t2w_x, rbo_y, '.')
    pyplot.xlabel('Twitter project to Wikipedia adjusted distance(Top in Twitter)')
    pyplot.ylabel('RBO')
    pyplot.savefig('t2w_adjusted_rbo_tweet_common_' + str(common_num) + '_top' + str(num), type='png')
    pyplot.close()

    # w2t_x, rbo_y = combine_dict_value(w2t_tweet_cs, rbo_tweet)
    # pyplot.plot(w2t_x, rbo_y, '.')
    # pyplot.xlabel('Wikipedia project to Twitter cosine similarity(Top in Twitter)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('w2t_rbo_tweet_cosine_top' + str(num), type='png')
    # pyplot.close()
    #
    # t2w_x, rbo_y = combine_dict_value(t2w_tweet_cs, rbo_tweet)
    # pyplot.plot(t2w_x, rbo_y, '.')
    # pyplot.xlabel('Twitter project to Wikipedia cosine similarity(Top in Twitter)')
    # pyplot.ylabel('RBO')
    # pyplot.savefig('t2w_rbo_tweet_cs_top' + str(num), type='png')
    # pyplot.close()
if __name__ == '__main__':
    # import numpy as np
    # import numpy.random
    # import matplotlib.pyplot as plt
    #
    # # Generate some test data
    # x = np.random.randn(8873)
    # y = np.random.randn(8873)
    #
    # heatmap, xedges, yedges = np.histogram2d(x, y, bins=50)
    # extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
    #
    # plt.clf()
    # plt.imshow(heatmap, extent=extent)
    # plt.show()
    #
    # raw_input()
    # num = 5000
    # wiki_dict = read_in_corpus_dict(wiki_dict_path, top_num=num)
    #
    # rbo_wiki = read_in_score('../data/' + 'rbo_w2t_t2w', wiki_dict, 'wiki', num)
    # print len(rbo_wiki.keys()), 'RBO wiki'
    # w2t_wiki = read_in_score('../data/w2t_distance_to_median', wiki_dict, 'wiki', num)
    # print len(w2t_wiki.keys()), 'W2T wiki'
    #
    #
    # w2t_x, rbo_y = combine_dict_value(w2t_wiki, rbo_wiki)
    # #pyplot.plot(w2t_x, rbo_y, '.')
    # heatmap, xedges, yedges = np.histogram2d(w2t_x, rbo_y, bins=100)
    # extent = [xedges[0], xedges[1], yedges[0], yedges[1]]
    #
    # pyplot.clf()
    # pyplot.imshow(heatmap, extent=extent)
    # pyplot.show()




    rbo_result = 'rbo_w2t_t2w'
    suffix = 'avgbyfixsize100_step10'
    plotting_average_rbo_distance(rbo_result, suffix)
    # common = 1000
    # k = 5000
    # plotting_adjusted_rbo(common, k)
