import pickle
import numpy as np
import xlrd
import gensim
import xlwt
#打开excel
from sklearn import preprocessing

import xlsxwriter as xw


id2word={}#word是部首
word2id={}
id2character={}
id2radicalstring={}
index2character={}
index2ccharacter={}
index2id={}
index2cid={}
id2index={}
character2index={}
index2radicalstring={}
#r12r2={}
cid2id={}







def readJia():
    word_pairdic=[]

    wb = xlrd.open_workbook('../data/20210520_oracle-radical.xlsx')
    # 按工作簿定位工作表
    sh = wb.sheet_by_name('相关部件对')
    for i in range(sh.nrows):
        li = sh.row_values(i)
        id1 = li[0]
        id2 = li[1]
        word_pairdic.append(id1+" "+id2)
    return word_pairdic

def readJia1():
    wb = xlrd.open_workbook('../data/20210520_oracle-radical.xlsx')
    # 按工作簿定位工作表
    sh = wb.sheet_by_name('部件表')
    for i in range(sh.nrows):
        if i == 0:
            continue
        li = sh.row_values(i)
        id = li[0].split("_")[1]
        word = li[1]
        id2word[id]=word
        word2id[word]=id


def readJia2(word_pairdic,word2id):

    wb = xlrd.open_workbook('../data/20210520_oracle-radical.xlsx')
    # 按工作簿定位工作表
    sh = wb.sheet_by_name('甲骨文-部件对应表')
    count=0
    ccount=0
    for i in range(sh.nrows):
        if i == 0:
            continue
        # print(sh.row_values(i))
        li = sh.row_values(i)
        id = li[0]
        id=id.split("_")[3]+id.split("_")[4]
        cha=li[1]

        pp=li[3]
        if "," in pp:
            pp=pp.split(",")
        else:
            pp=[pp]
        id2character[id]=cha
        id2radicalstring[id]="".join(pp)
        index2radicalstring[count]="".join(pp)
        index2character[count]=cha
        character2index[cha]=count

        index2id[count] = id
        if id[:-1] not in cid2id.keys():
            cid2id[id[:-1]]=[id]
            index2cid[ccount] = id[:-1]
            index2ccharacter[ccount] = cha
            ccount=ccount+1
        else:
            cid2id[id[:-1]].append(id)

        id2index[id]=count
        count=count+1


        #print(id +" "+pp[0])
        for p in pp:
            if p not in word2id.keys():
                continue
            p = word2id[p]
            if (id + " " + p not in word_pairdic) :
                word_pairdic.append(id + " " + p)
            if (p + " " + id not in word_pairdic) :
                word_pairdic.append(p + " " + id)
    return word_pairdic


def Normalize(data):
    m = np.mean(data)
    mx = np.max(data)
    mn = np.min(data)
    data=(data-m)/(mx - mn)
    return data


def similarityRank(num,temp):
    #num：取排名最高的前几名

    max_value = []
    max_index = []
    for k in range(num):
        index_max = np.argmax(temp, axis=1)  # 其中，axis=1表示按行计算
        #print(index_max)
        max = temp[range(temp.shape[0]), index_max]
        #print(max)
        temp[range(temp.shape[0]), index_max] = -1
        max_value.append(max)
        max_index.append(index_max)
    max_index=np.array(max_index)
    max_value = np.array(max_value)
    return max_value.T, max_index.T


def showoutput(max_index,max_value,topk,level=0):
    cid2mappings={}
    c2mappings={}
    if level==0:
        for n in range(len(max_index)):  # 每一行，每一个字
            cid2mappings[index2id[n]] = []
            c2mappings[index2character[n]] = []
            for k in range(topk):
                index = max_index[n][k]
                cid2mappings[index2id[n]].append((index2id[index], max_value[n][k]))
                c2mappings[index2character[n]].append((index2character[index], max_value[n][k]))
    else:
        for n in range(len(max_index)):  # 每一行，每一个字
            cid2mappings[index2cid[n]] = []
            c2mappings[index2ccharacter[n]] = []
            for k in range(topk):
                index = max_index[n][k]
                cid2mappings[index2cid[n]].append((index2cid[index], max_value[n][k]))
                c2mappings[index2ccharacter[n]].append((index2ccharacter[index], max_value[n][k]))
    return cid2mappings,c2mappings

def networksimi(index2radicalstring,path):
    cfmodel = gensim.models.KeyedVectors.load_word2vec_format(path, binary=False)
    cpair2LCSsimi_np = np.zeros((2912, 2912))
    idlist = list(index2radicalstring.keys())

    for k, id in enumerate(idlist[:-1]):
        for idd in idlist[k + 1:]:
            sim=cfmodel.similarity(index2id[id],index2id[idd])
            cpair2LCSsimi_np[id][idd] = sim
            cpair2LCSsimi_np[idd][id] = sim
    return cpair2LCSsimi_np

def getcidsimi(cid1,cid2,cfmodel):
    idlist1=cid2id[cid1]
    idlist2 = cid2id[cid2]

    maxsim=-1
    for id1 in idlist1:
        for id2 in idlist2:
            sim = cfmodel.similarity(id1, id2)
            if sim>maxsim:
                maxsim=sim

    return maxsim





def clevelnetworksimi(index2radicalstring,path):
    cfmodel = gensim.models.KeyedVectors.load_word2vec_format(path, binary=False)
    cpair2LCSsimi_np = np.zeros((2543, 2543))
    idlist=list(range(2543))
    #print(idlist)
    for k, id in enumerate(idlist[:-1]):
        for idd in idlist[k + 1:]:
            sim=getcidsimi(index2cid[id],index2cid[idd],cfmodel)
            cpair2LCSsimi_np[id][idd] = sim
            cpair2LCSsimi_np[idd][id] = sim
    return cpair2LCSsimi_np


def wirte2excel(path,contexts):

    workbook = xlwt.Workbook(encoding='utf-8')  # 写入excel文件
    sheet = workbook.add_sheet('Sheet1', cell_overwrite_ok=True)  # 新增一个sheet工作表

    for i in range(len(contexts)):  # 写入3行数据
        for j in range(2):  # 写入3列数据
            sheet.write(i, j, contexts[i][j])

    workbook.save(path)





word_pairdic=readJia()
readJia1()
word_pairdic=readJia2(word_pairdic,word2id)

#生成network文档
# edge_file = open('../data/radicalcharacterNet.txt', 'w')
# for pair in word_pairdic:
#     edge_file.write(pair + '\n')

#生成character_node2vec.txt
# cd /Users/chiyang/PycharmProjects/ancientCharacterSimi/OpenNE-master/src
# python3 -m openne --method node2vec --input /Users/chiyang/PycharmProjects/ancientCharacterSimi/data/radicalcharacterNet.txt  --output radical_character_node2vec.txt --representation 50


#得到文字级别的network相似性矩阵
path="../data/character_node2vec.txt"
cpair2Netsimi_np1=clevelnetworksimi(index2radicalstring,path)

#np.save('../data/results/jia_graph_simi_np', cpair2Netsimi_np1)


#输出文字级别排序结果
max_value1, max_index1=similarityRank(20,cpair2Netsimi_np1)
ccid2mappings1,cc2mappings1=showoutput(max_index1,max_value1,20,level=1)


print("----------------------------------------------------")
print(cc2mappings1['鼎'])
print(cc2mappings1['宿'])
print(cc2mappings1['月'])
print(cc2mappings1['降'])




