import sys
import seaborn

mod_scores = []
org_scores = []
legend = ['negation', 'modality', 'future', 'text infilling']
for t in ['negation_auto', 'modality_auto', 'future_auto', 'ilm']:
#legend = ['negation', 'negation-polyjuice', 'modality', 'future', 'text infilling']
#for t in ['negation_auto', 'poly-negation_auto', 'modality_auto', 'future_auto', 'ilm']:
    mod_scores.append([float(l.strip('\n')) for l in open('only_corr_' + t + '.txt')])
    org_scores.append([float(l.strip('\n')) for l in open('only_corr_%s_org.txt'%t)])
    assert len(mod_scores[-1]) == len(org_scores[-1])
#mod_score = [float(l.strip('\n')) for l in open('../confidence_scores/adv/auto/mod_'+sys.argv[1]+'_model_score.txt')]
#org_score = [float(l.strip('\n')) for l in open('../confidence_scores/org/auto/mod_'+ sys.argv[1]+'_model_score_org.txt')]
#mod_score = [float(l.strip('\n')) for l in open(sys.argv[1])]
#org_score = [float(l.strip('\n')) for l in open(sys.argv[2])]

diffs = []
for j in range(len(mod_scores)):
    diff_ = []
    mod_score = mod_scores[j]
    org_score = org_scores[j]
    #for i in range(len(mod_score)):
    #    diff_.append(((mod_score[i] - org_score[i])/org_score[i])*100)

    for i in range(len(mod_score)):
        diff_.append((mod_score[i]/org_score[i]))
    print(sum(diff_)/len(diff_))
    diffs.append(diff_)
    #fw = open('only_corr_diff_' + sys.argv[1] + '.txt', 'w')
    #for l in diff_:
        #fw.write(str(l) + '\n')

import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
#ax.set_xlim(-150, 1000, 1)
#plt.scatter(np.array(np.log(diff_)), np.random.rand(len(diff_),), s=0.75)
kw = {
        'bins': 140,
        'histtype': 'step',
        'range': (-8, 6),
    }
for j in range(len(diffs)):
    #res = seaborn.displot(np.log(diffs[j]), label=legend[j], kind='kde')
    #res = seaborn.distplot(np.log(diffs[j]), label=legend[j], hist=False, kde_kws={'clip':(1, -1)})
    res = seaborn.distplot(np.log(diffs[j]), label=legend[j], hist=False)

    #plt.hist(np.array(np.log(diffs[j])), 140, histtype='step', range=(-8,6), label=legend[j], density=True)
#plt.ylabel('% of examples')
plt.ylabel('probability density')
#plt.xlabel('confidence difference (in percentage)')
plt.xlabel('calibration score: pert./org. (in log scale)')
#plt.title("")
plt.legend()
ax.axvline(c='black', lw=1, linestyle='--')
plt.savefig('all_seaborn_proportion_1.png')

