import json
import time
from num2words import num2words
from word2number import w2n
from nltk.tokenize import word_tokenize


def write_txt(data,text_file):
    with open(text_file,'a') as of:
        of.write(data +'\n')

def write_json(data,json_file):
    with open(json_file, "w") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

def build_temple(json_file,original_file,masked_file,id_file,dictionary_file,template_file):
    with open('ontology.json',"r") as f:
        ontology = json.load(f)

    with open(json_file, "r") as f:
        multiwoz_data = json.load(f)

    dictionary_final={}
    domain_attribute = []

    for data in multiwoz_data:
        entity_count = 0
        dictionary={}

        prev_domain = ""
        #build the dictionary
        for i in range(len(multiwoz_data[data]['log'])):
            if len(multiwoz_data[data]['log'][i]['span_info']) > 0:
                for j in range(len(multiwoz_data[data]['log'][i]['span_info'])):
                    domain = multiwoz_data[data]['log'][i]['span_info'][j][0].split("-")[0].lower()
                    attribute = entity = multiwoz_data[data]['log'][i]['span_info'][j][1]
                    entity = multiwoz_data[data]['log'][i]['span_info'][j][2].lower()
                    span_start = multiwoz_data[data]['log'][i]['span_info'][j][3]
                    span_end = multiwoz_data[data]['log'][i]['span_info'][j][4]
                    
                    if domain == "booking":
                        if prev_domain != "":
                            domain = prev_domain
                        
                    if attribute =="ref" or attribute == "choice":
                        continue
                    
                    if domain+"-"+attribute in {'taxi-name','booking-name','train-name'}:
                        if entity in ontology['hotel-name']:
                            domain = 'hotel'
                        elif entity in ontology['restaurant-name']:
                            domain = 'restaurant'
                        elif entity in ontology['attraction-name']:
                            domain = 'attraction'
                        elif entity in {'the gonville hotel','university arms'}:
                            domain = 'hotel'
                        else:
                            print(entity)
                            print(domain)
                            print(data)
                    
                    if domain+"-"+attribute not in domain_attribute:
                        domain_attribute.append(domain+"-"+attribute)
                    

                    

                    d_key=[]
                    d_value=[]
                    if entity not in dictionary.keys():
                        if "the "+entity in dictionary.keys():
                            t = "the "+entity
                            d_key.append(entity)
                            d_value.append(dictionary[t])
                            d1 = dict(zip(d_key,d_value))
                            dictionary.update(d1)

                        elif entity.replace("the ","")in dictionary.keys():
                            t = entity.replace("the ","")
                            d_key.append(entity)
                            d_value.append(dictionary[t])
                            d1 = dict(zip(d_key,d_value))
                            dictionary.update(d1)

                        elif entity.replace("'s","s") in dictionary.keys():
                            t = entity.replace("'s","s")
                            d_key.append(entity)
                            d_value.append(dictionary[t])
                            d1 = dict(zip(d_key,d_value))
                            dictionary.update(d1)
                            
                        
                        elif entity.replace(".","") in dictionary.keys():
                            t = entity.replace(".","")
                            d_key.append(entity)
                            d_value.append(dictionary[t])
                            d1 = dict(zip(d_key,d_value))
                            dictionary.update(d1)
                        

                        else:
                            
                            d_key.append(entity)
                            d_value.append('['+domain+'-'+attribute+str(entity_count)+']')
                            entity_count = entity_count+1
                            d1 = dict(zip(d_key,d_value))
                            dictionary.update(d1)
                    
                    if domain != "booking":
                        prev_domain = domain





        #some rules to update dictionary based on observations
        #b&b 
        for key in dictionary.keys():
            if 'b&b' in key or 'bed and breakfast' in key:
                if 'name' not in dictionary[key]:
                    if 'b&b' in key:
                        key2 = key.replace('b&b','bed and breakfast')
                    else:
                        key2 = key.replace('bed and breakfast','b&b')
                    if key2 in dictionary:
                        dictionary[key] = dictionary[key2]
        
        #da vinci pizzeria. and huntingdon marriott
        for key in dictionary.keys():
            if key == "da vinci pizzeria" and "da vinci pizzeria." in dictionary.keys():
                dictionary[key] = dictionary["da vinci pizzeria."]
            if key == "huntingdon marriott" and "huntingdon marriot" in dictionary.keys():
                dictionary[key] = dictionary["huntingdon marriot"]
        
        
        
        #taxi
        for key in dictionary.keys():
            if key == 'the restaurant':
                for value in dictionary.values():
                    if 'restaurant-name' in value:
                        dictionary[key] = value
            elif key == "the hotel":
                for value in dictionary.values():
                    if 'hotel-name' in value:
                        dictionary[key] = value
            
            elif 'taxi' in dictionary[key]:
                for key2 in dictionary.keys():
                    if key in key2 and key != key2:
                        dictionary[key] = dictionary[key2]
            
            
        #avalon
        for key in dictionary.keys():
            if key+" restaurant" in dictionary.keys():
                dictionary[key] = dictionary[key+" restaurant"]
            if key+" hotel" in dictionary.keys():
                dictionary[key] = dictionary[key+" hotel"]
            if key+" house" in dictionary.keys():
                dictionary[key] = dictionary[key+" house"]
            if key+"s" in dictionary.keys():
                dictionary[key] = dictionary[key+"s"]





        #replace text to build the template
        for i in range(len(multiwoz_data[data]['log'])):
            text = multiwoz_data[data]['log'][i]['text'].lower()
            write_txt(text,original_file)
            t = text
            if len(multiwoz_data[data]['log'][i]['span_info']) > 0:
                len_diff = 0
                placeholder_list = []
                for j in range(len(multiwoz_data[data]['log'][i]['span_info'])):
                    attribute = multiwoz_data[data]['log'][i]['span_info'][j][1]
                    entity = multiwoz_data[data]['log'][i]['span_info'][j][2].lower()
                    span_start = multiwoz_data[data]['log'][i]['span_info'][j][3]
                    span_end = multiwoz_data[data]['log'][i]['span_info'][j][4]
                    span_start = span_start + len_diff 
                    span_end = span_end + len_diff
                    
                    
                    
                    
                    if attribute == 'choice' or attribute == 'ref':
                        multiwoz_data[data]['log'][i]['span_info'][j][3] = span_start
                        multiwoz_data[data]['log'][i]['span_info'][j][4] = span_end
                        continue
                        
    
                    if entity in dictionary.keys():
                        if dictionary[entity] not in placeholder_list:
                            len1 = span_end - span_start
                            len2 = len(dictionary[entity])
                            len_diff = len_diff + len2 - len1
                            t1 = t[0:span_start]
                            t2 = t[span_end:]
                            t = t1+dictionary[entity]+t2
                            multiwoz_data[data]['log'][i]['span_info'][j][3] = span_start
                            multiwoz_data[data]['log'][i]['span_info'][j][4] = span_end + len2 - len1
                            placeholder_list.append(dictionary[entity])
                            
                            
                        elif t[span_start:span_end] not in dictionary[entity]:
                            len1 = span_end - span_start
                            len2 = len(dictionary[entity])
                            len_diff = len_diff + len2 - len1
                            t1 = t[0:span_start]
                            t2 = t[span_end:]
                            t = t1+dictionary[entity]+t2
                            multiwoz_data[data]['log'][i]['span_info'][j][3] = span_start
                            multiwoz_data[data]['log'][i]['span_info'][j][4] = span_end + len2 - len1
                            placeholder_list.append(dictionary[entity])
                        
                        elif t[span_start:span_end] == dictionary[entity][-2]:
                            len1 = span_end - span_start
                            len2 = len(dictionary[entity])
                            len_diff = len_diff + len2 - len1
                            t1 = t[0:span_start]
                            t2 = t[span_end:]
                            t = t1+dictionary[entity]+t2
                            multiwoz_data[data]['log'][i]['span_info'][j][3] = span_start
                            multiwoz_data[data]['log'][i]['span_info'][j][4] = span_end + len2 - len1
                            placeholder_list.append(dictionary[entity])
                            
                    else:
                        multiwoz_data[data]['log'][i]['span_info'][j][3] = span_start
                        multiwoz_data[data]['log'][i]['span_info'][j][4] = span_end
                        
                    
            multiwoz_data[data]['log'][i]['text'] = t
            
            write_txt(t,masked_file)
            write_txt(data,id_file)


            
            

        #replace the goal
        try: 
            del multiwoz_data[data]['goal']['message']
        except:
            pass
        for goal in multiwoz_data[data]['goal'].keys():
            for goal_2 in multiwoz_data[data]['goal'][goal].keys():
                if isinstance(multiwoz_data[data]['goal'][goal][goal_2],dict):
                    for goal_3 in multiwoz_data[data]['goal'][goal][goal_2].keys():
                        text = multiwoz_data[data]['goal'][goal][goal_2][goal_3]
                        if isinstance(text, bool):
                            pass
                        else:
                            text = text.lower()
                            if text not in dictionary.keys():
                                t = "the "+text
                                if t in dictionary.keys():
                                    text = t
                                else:
                                    t = text.strip('the ')
                                    if t in dictionary.keys():
                                        text = t
                            
                            for d_key in dictionary.keys():
                                if text == d_key:
                                    text = text.replace(d_key,dictionary[d_key])
                            multiwoz_data[data]['goal'][goal][goal_2][goal_3]= text


        #replace dialog_act
        for i in range(len(multiwoz_data[data]['log'])):
            for key in multiwoz_data[data]['log'][i]['dialog_act'].keys():
                for j in range(len(multiwoz_data[data]['log'][i]['dialog_act'][key])):
                    for k in range(len(multiwoz_data[data]['log'][i]['dialog_act'][key][j])):
                        text = multiwoz_data[data]['log'][i]['dialog_act'][key][j][k]
                        text = text.lower()
                        if k > 0 and k/2 !=0:
                            for d_key in dictionary.keys():
                                if text == d_key:
                                    text = text.replace(d_key,dictionary[d_key])
                            multiwoz_data[data]['log'][i]['dialog_act'][key][j][k] = text


        #replace span_info
        for i in range(len(multiwoz_data[data]['log'])):
            if len(multiwoz_data[data]['log'][i]['span_info']) > 0:
                for j in range(len(multiwoz_data[data]['log'][i]['span_info'])):
                    for k in range(len(multiwoz_data[data]['log'][i]['span_info'][j])):
                        text = multiwoz_data[data]['log'][i]['span_info'][j][k]
                        try:
                            text = text.lower()
                        except:
                            pass
                        try: 
                            for d_key in dictionary.keys():
                                if text == d_key:
                                    text = text.replace(d_key,dictionary[d_key])
                        except:
                            pass

                        multiwoz_data[data]['log'][i]['span_info'][j][k]=text


         #replace metadata
        for i in range(len(multiwoz_data[data]['log'])):
            for key in multiwoz_data[data]['log'][i]['metadata'].keys():
                for book_2 in multiwoz_data[data]['log'][i]['metadata'][key]['book'].keys():
                    if not isinstance(multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2],list):
                        temp = multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2]
                        if temp =="none" or temp == "":
                            multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2]=[]
                        else:
                            multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2]=word_tokenize(temp)

                    for j in range(len(multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2])):
                        if isinstance(multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j],dict):
                            for book_3 in  multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j].keys():
                                if isinstance(multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3],list):
                                    for k in range(len(multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3])):
                                        text = multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3][k]
                                        text = text.lower()
                                        if not isinstance(text,str):
                                            print('got book text not string type')
                                        if text not in dictionary.keys():
                                            t = "the "+text
                                            if t in dictionary.keys():
                                                text = t
                                            else:
                                                t = text.replace('the ','')
                                                if t in dictionary.keys():
                                                    text = t

                                        for d_key in dictionary.keys():
                                            if text == d_key:
                                                text = text.replace(d_key,dictionary[d_key])
                                        multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3][k]=text
                                else:
                                    text = multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3]
                                    text = text.lower()
                                    if not isinstance(text,str):
                                        print('got book text not string type')
                                    if text not in dictionary.keys():
                                        t = "the "+text
                                        if t in dictionary.keys():
                                            text = t
                                        else:
                                            t = text.replace('the ','')
                                            if t in dictionary.keys():
                                                text = t

                                    for d_key in dictionary.keys():
                                        if text == d_key:
                                            text = text.replace(d_key,dictionary[d_key])
                                            
                                    multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3]=text

                        else:
                            text = multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j]
                            #print(text)
                            text = text.lower()
                            if not isinstance(text,str):
                                print('got book text not string type')
                            if text not in dictionary.keys():
                                t = "the "+text
                                if t in dictionary.keys():
                                    text = t
                                else:
                                    t = text.replace('the ','')
                                    if t in dictionary.keys():
                                        text = t

                            for d_key in dictionary.keys():
                                if text == d_key:
                                    text = text.replace(d_key,dictionary[d_key])
                            multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j]=text







                for semi_2 in multiwoz_data[data]['log'][i]['metadata'][key]['semi'].keys():
                    if not isinstance(multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2],list):
                        temp = multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2]
                        if temp =="none" or temp == "":
                            multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2]=[]
                        else:
                            multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2]=word_tokenize(temp)

                    for j in range(len(multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2])):
                        text = multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2][j]
                        text = text.lower()
                        if not isinstance(text,str):
                            print('got semi text not string type')
                        if text not in dictionary.keys():
                            t = "the "+text
                            if t in dictionary.keys():
                                text = t
                            else:
                                t = text.replace('the ','')
                                if t in dictionary.keys():
                                    text = t

                        for d_key in dictionary.keys():
                            if text == d_key:
                                text = text.replace(d_key,dictionary[d_key])
                        multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2][j]=text

                for book_2 in multiwoz_data[data]['log'][i]['metadata'][key]['book'].keys():
                    for j in range(len(multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2])):
                        if isinstance(multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j],dict):
                            for book_3 in  multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j].keys():
                                if isinstance(multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3],list):
                                    multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3]=multiwoz_data[data]['log'][i]['metadata'][key]['book'][book_2][j][book_3][:1]
                        else:
                            pass
                for semi_2 in multiwoz_data[data]['log'][i]['metadata'][key]['semi'].keys():
                    multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2]=multiwoz_data[data]['log'][i]['metadata'][key]['semi'][semi_2][:1]

        #store the dictionary    
        k=[]
        v=[]
        k.append(data)
        v.append(dictionary)
        d2 = dict(zip(k,v))
        dictionary_final.update(d2)      
    write_json(dictionary_final,dictionary_file)
    write_json(multiwoz_data,template_file)
    print(domain_attribute)