import numpy as np
import pickle
import gensim
import xlrd
#glove_model = KeyedVectors.load_word2vec_format(word2vec_output_file, binary=False)


cfmodel = gensim.models.KeyedVectors.load_word2vec_format("../data/cfid2vec_epoch90_2895_2543.txt", binary=False)

df=open('../data/cfcount2cfid_dic.pkl','rb')#2912
cfcount2cfidExcel_dic = pickle.load(df)
df.close()

df=open('../data/cfcount2character_dic.pkl','rb')#2912
cfcount2characterExcel_dic = pickle.load(df)
df.close()


index2ccharacter={}
index2cid={}
cid2id={}
index2duti={}
index2duti[0] = 0

def readJia2():

    wb = xlrd.open_workbook('../data/20210520_oracle-radical.xlsx')
    # 按工作簿定位工作表
    sh = wb.sheet_by_name('甲骨文-部件对应表')
    count=0
    ccount=0
    for i in range(sh.nrows):
        if i == 0:
            continue
        # print(sh.row_values(i))
        li = sh.row_values(i)
        id = li[0]
        id=id.split("_")[3]+id.split("_")[4]
        cha=li[1]

        pp=li[3]
        if "," in pp:
            pp=pp.split(",")


        else:
            pp=[pp]
            index2duti[ccount] = 1


        if id[:-1] not in cid2id.keys():
            cid2id[id[:-1]]=[id]
            index2cid[ccount] = id[:-1]
            index2ccharacter[ccount] = cha

            ccount=ccount+1
            index2duti[ccount] = 0
        else:
            cid2id[id[:-1]].append(id)
        #print(id +" "+pp[0])


def networksimi(cfmodel,cfcount2cfidExcel_dic):
    cpair2LCSsimi_np = np.zeros((2912, 2912))
    idlist = list(cfcount2cfidExcel_dic.keys())
    print(idlist)
    print(cfcount2cfidExcel_dic)
    for k, index1 in enumerate(idlist[:-1]):
        for index2 in idlist[k + 1:]:
            id1=cfcount2cfidExcel_dic[index1]
            id2=cfcount2cfidExcel_dic[index2]
            try:
                sim = cfmodel.similarity(id1, id2)
                cpair2LCSsimi_np[index1][index2] = sim
                cpair2LCSsimi_np[index2][index1] = sim
            except:
                continue
                # cpair2LCSsimi_np[index1][index2] = 0
                # cpair2LCSsimi_np[index2][index1] = 0

    return cpair2LCSsimi_np


def getcidsimi(cid1,cid2):
    idlist1=cid2id[cid1]
    idlist2 = cid2id[cid2]
    maxsim=-1
    for id1 in idlist1:
        for id2 in idlist2:
            sim = cfmodel.similarity(id1, id2)
            if sim>maxsim:
                maxsim=sim

    return maxsim


def clevelnetworksimi(filterRadical=True):
    cpair2LCSsimi_np = np.zeros((2543, 2543))
    idlist=list(range(2543))
    for k, id in enumerate(idlist[:-1]):
        for idd in idlist[k + 1:]:
            try:
                sim = getcidsimi(index2cid[id], index2cid[idd])
                cpair2LCSsimi_np[id][idd] = sim
                cpair2LCSsimi_np[idd][id] = sim
            except:
                continue
    if filterRadical== True:
        cpair2LCSsimi_np1 = np.load('../data/results/jia_RLCS_simi_np.npy')
        cpair2LCSsimi_np1=np.where(cpair2LCSsimi_np1 > 0, 1, 0.4)
        cpair2LCSsimi_np=cpair2LCSsimi_np*cpair2LCSsimi_np1

    return cpair2LCSsimi_np





def similarityRank(num,temp):
    #num：取排名最高的前几名

    max_value = []
    max_index = []
    for k in range(num):
        index_max = np.argmax(temp, axis=1)
        #print(index_max)
        max = temp[range(temp.shape[0]), index_max]
        #print(max)
        temp[range(temp.shape[0]), index_max] = -1
        max_value.append(max)
        max_index.append(index_max)
    max_index=np.array(max_index)
    max_value = np.array(max_value)
    return max_value.T, max_index.T


def showoutput(max_index,max_value,topk):
    cid2mappings={}
    c2mappings={}
    for n in range(len(max_index)):
        cid2mappings[cfcount2cfidExcel_dic[n]]=[]
        c2mappings[cfcount2characterExcel_dic[n]]=[]
        for k in range(topk):
            index=max_index[n][k]
            cid2mappings[cfcount2cfidExcel_dic[n]].append((cfcount2cfidExcel_dic[index],max_value[n][k]))
            c2mappings[cfcount2characterExcel_dic[n]].append((cfcount2characterExcel_dic[index],max_value[n][k]))
    return cid2mappings,c2mappings

def showoutput1(max_index,max_value,topk,level=0):
    cid2mappings={}
    c2mappings={}
    if level==0:
        for n in range(len(max_index)):
            cid2mappings[cfcount2cfidExcel_dic[n]] = []
            c2mappings[cfcount2characterExcel_dic[n]] = []
            for k in range(topk):
                index = max_index[n][k]
                cid2mappings[cfcount2cfidExcel_dic[n]].append((cfcount2cfidExcel_dic[index], max_value[n][k]))
                c2mappings[cfcount2characterExcel_dic[n]].append((cfcount2characterExcel_dic[index], max_value[n][k]))

    else:
        for n in range(len(max_index)):
            cid2mappings[index2cid[n]] = []
            c2mappings[index2ccharacter[n]] = []
            for k in range(topk):
                index = max_index[n][k]
                cid2mappings[index2cid[n]].append((index2cid[index], max_value[n][k]))
                c2mappings[index2ccharacter[n]].append((index2ccharacter[index], max_value[n][k]))
    return cid2mappings,c2mappings








readJia2()

#得到文字级别的network相似性矩阵
cpair2Figuresimi_np1=clevelnetworksimi(filterRadical=True)
#np.save('../data/results/jia_pic_simi_np', cpair2Figuresimi_np1)


#输出文字级别结果
max_value, max_index=similarityRank(100,cpair2Figuresimi_np1)
cid2mappings,c2mappings=showoutput1(max_index,max_value,100,level=1)
print("----------------------------------------------------")
print(c2mappings['鼎'])
print(c2mappings['宿'])
print(c2mappings['月'])















