import numpy as np
import pickle
import codecs
from collections import defaultdict
from scipy.stats import spearmanr
from sklearn import preprocessing
import pickle
from collections import defaultdict
final_dict=defaultdict(list)
from gensim.models import Word2Vec
import gensim.downloader
from glove import Corpus, Glove
from gensim.models import FastText
from scipy.stats import spearmanr
from sklearn import preprocessing
from sklearn.metrics.pairwise import cosine_similarity
from scipy import spatial
from scipy.stats import kendalltau

class embedding_evaluation:
    def __init__(self):
        self.model= Word2Vec.load("word2vec_billion.model")
        self.model_fast= FastText.load("billion_fasttext.model")
        self.model_glove= Glove.load("billion_glove_updated.model")
        
    def load_embedding(self,path):
        saved= open(str(path)+'final_dict.pkl', "rb")
        profile_dict_word1 = pickle.load(saved)
        saved.close()
        return profile_dict_word1

    def load_dataset(self,path):
        pair_list = []

        fread_simlex=codecs.open(str(path) + ".txt", 'r', 'utf-8')
        pair_list = []

        line_number = 0
        for line in fread_simlex:
            if line_number > 0:
                tokens = line.split()
                if path=='./dataset/EN-MEN-LEM-TEST':
                    word_i = tokens[0].lower()[:-2]
                    word_j = tokens[1].lower()[:-2]
                else:
                    word_i = tokens[0].lower()
                    word_j = tokens[1].lower()
                score = float(tokens[2])
                pair_list.append( ((word_i, word_j), score) )
            line_number += 1
        return pair_list

    def load_evaluation(self, pair_list, profile_dict_word1):
        calculated_score=[]
        extracted_list = []
        original_score=[]
        calculated_w2v=[]
        word_pairs=[]
        calculated_fast=[]
        calculated_glove=[]
        words = (self.model.wv.index_to_key)
        embedding_weight_w2v=[]
        from sklearn.metrics.pairwise import cosine_similarity
        for (x,y) in pair_list:
            if x in profile_dict_word1:
                word1, word2=x
                if word1 in words and word2 in words:
                    word1_prof=profile_dict_word1[x]
                    w2v_score=self.model.wv.similarity(word1, word2)
                    fast_score=self.model_fast.wv.similarity(word1, word2)
                    glove_vec1= [self.model_glove.word_vectors[self.model_glove.dictionary[word1]]]
                    glove_vec2= [self.model_glove.word_vectors[self.model_glove.dictionary[word2]]]
                    glove_score= 1-spatial.distance.cosine(glove_vec1,glove_vec2)
                    extracted_list.append((x, word1_prof))
                    calculated_score.append(word1_prof)
                    original_score.append(y)
                    calculated_w2v.append(w2v_score)
                    word_pairs.append(x)
                    calculated_fast.append(fast_score)
                    embedding_weight_w2v.append(self.model.wv[word1])
                    embedding_weight_w2v.append(self.model.wv[word2])
                    calculated_glove.append(glove_score)

        print(f'original score {len(original_score)} calculated w2v {len(calculated_score)} calculated fast {len(calculated_fast)} Glove {len(calculated_glove)}')
        spearman_TM = spearmanr(original_score, calculated_score)
        spearman_w2v = spearmanr(original_score, calculated_w2v)
        spearman_fast = spearmanr(original_score, calculated_fast)
        spearman_glove = spearmanr(original_score, calculated_glove)
        spearman_tm_w2v= spearmanr(calculated_glove, calculated_w2v)
        spearman_TM = round(spearman_TM[0], 3)
        spearman_w2v = round(spearman_w2v[0], 3)
        spearman_fast = round(spearman_fast[0], 3)
        spearman_glove = round(spearman_glove[0], 3)
        spearman_tm_w2v= round(spearman_tm_w2v[0], 3)
        print(f'spearman TM {spearman_TM} spearman W2V {spearman_w2v} spearman Fast {spearman_fast} spearman_glove {spearman_glove}, spearman_tm_w2v {spearman_tm_w2v}')

        total_list=[]

        total_list.append(original_score)
        total_list.append(calculated_score)

        total_w2v=[]
        total_w2v.append(original_score)
        total_w2v.append(calculated_w2v)

        total_fast=[]
        total_fast.append(original_score)
        total_fast.append(calculated_fast)

        tm_w2v=[]
        tm_w2v.append(calculated_score)
        tm_w2v.append(calculated_w2v)

        tm_glove=[]
        tm_glove.append(original_score)
        tm_glove.append(calculated_glove)

        similarity = cosine_similarity(total_list)
        similarity_w2v= cosine_similarity(total_w2v)
        similarity_fast= cosine_similarity(total_fast)
        similarity_glove= cosine_similarity(tm_glove)
        similarity_tm_w2v= cosine_similarity(tm_w2v)
        print(f'Cosine TM {similarity} Consine w2v {similarity_w2v}  Cosine Fast {similarity_fast} Cosine tm_w2v {similarity_tm_w2v} Cosine glove{similarity_glove}')

        TM_corr= np.corrcoef(original_score, calculated_score)
        w2v_corr= np.corrcoef(original_score, calculated_w2v)
        fast_corr= np.corrcoef(original_score, calculated_fast)
        glove_corr= np.corrcoef(original_score, calculated_glove)
        print(f'pearson TM {TM_corr} pearson w2v {w2v_corr} pearson fast {fast_corr} pearson glove{glove_corr}')

        kendal_TM, _ = kendalltau(original_score, calculated_score)
        kendal_w2v, _ = kendalltau(original_score, calculated_w2v)
        kendal_fast, _ = kendalltau(original_score, calculated_fast)
        kendal_glove, _ = kendalltau(original_score, calculated_glove)
        print(f'kendal TM {kendal_TM} kendal W2V {kendal_w2v} Kendal fast {kendal_fast} kendal glove{kendal_glove}')
        print('------------------------------------------------------------------------- ')
        return


