import pickle
import pandas as pd
import json
from tqdm import tqdm
all_contexts_disamb_path="/mnt/122a7683-fa4b-45dd-9f13-b18cc4f4a187/entity_training/all_contexts.pkl"
PATH_TO_WIKIDATA_PEOPLE_DF="/mnt/122a7683-fa4b-45dd-9f13-b18cc4f4a187/entity_training/all_ents_wdata_comp_people.csv"

with open(all_contexts_disamb_path, "rb") as f:
    all_contexts = pickle.load(f)

all_context_keys=list(all_contexts.keys())
all_context_keys=set(list(all_contexts.keys()))

###Get all names from the wikidata people df to make a qid lookup dict later
df_wikidata_people = pd.read_csv(PATH_TO_WIKIDATA_PEOPLE_DF,converters={'Aliases': eval})
EngWikipediaTitle_set=set(df_wikidata_people['EngWikipediaTitle'].tolist())
aliases_set=set([item for sublist in df_wikidata_people['Aliases'].tolist() for item in sublist])
label_set=set(df_wikidata_people['Label'].tolist())
all_names = EngWikipediaTitle_set.union(aliases_set).union(label_set)

filtered_all_context_keys = [key for key in all_context_keys if key in all_names]

del all_contexts
del all_context_keys

##We want to map each key to a QID - from the corresponding row in the wikidata people df
##First try English Wikipedia Title

##Set English Wikipedia Title as index
df_wikidata_people.set_index('EngWikipediaTitle', inplace=True)

##Create a dictionary with the key as the name and the value as the QID
name_to_qid_dict = df_wikidata_people['QID'].to_dict()

##Now, we first get the intersection of the filtered_all_context_keys and the keys in the name_to_qid_dict
##Then, we create a dictionary with the key as the name and the value as the QID
name_to_qid_dict_titles = {k: name_to_qid_dict[k] for k in tqdm(filtered_all_context_keys) if k in name_to_qid_dict}


##Now we do the remaining names
##Set Label as index
df_wikidata_people.set_index('Label', inplace=True)
name_to_qid_dict = df_wikidata_people['QID'].to_dict()
name_to_qid_dict_labels = {k: name_to_qid_dict[k] for k in tqdm(filtered_all_context_keys) if k in name_to_qid_dict and k not in name_to_qid_dict_titles}

##Need to expand the aliases before we can do the same thing
##First we need to create a dictionary with the key as the alias and the value as the QID
aliases_df_exploded = df_wikidata_people.explode('Aliases')

##Drop if the alias is NaN, keep only Aliases and QID columns
aliases_df_exploded.dropna(subset=['Aliases'], inplace=True)

##Set Aliases as index
aliases_df_exploded.set_index('Aliases', inplace=True)
##Create a dictionary with the key as the alias and the value as the QID
name_to_qid_dict = aliases_df_exploded['QID'].to_dict()

##Now, we first get the intersection of the filtered_all_context_keys and the keys in the name_to_qid_dict
name_to_qid_dict_aliases = {k: name_to_qid_dict[k] for k in tqdm(filtered_all_context_keys) if k in name_to_qid_dict and k not in name_to_qid_dict_titles and k not in name_to_qid_dict_labels}

##Now, add all by taking union of the dictionaries
name_to_qid_dict = {**name_to_qid_dict_titles, **name_to_qid_dict_labels, **name_to_qid_dict_aliases}

##Save the dictionary
with open("/mnt/122a7683-fa4b-45dd-9f13-b18cc4f4a187/entity_training/name_to_qid_dict_all_contexts.json", "w") as f:
    json.dump(name_to_qid_dict, f)
    
##load back the context dictionary, map the keys to the QIDs and save it (replace keys with QIDs)
with open(all_contexts_disamb_path, "rb") as f:
    all_contexts = pickle.load(f)

all_contexts_mapped = {name_to_qid_dict[k]: v for k, v in tqdm(all_contexts.items()) if k in name_to_qid_dict}

###Save the mapped dictionary
with open("/mnt/122a7683-fa4b-45dd-9f13-b18cc4f4a187/entity_training/all_contexts_humans_mapped_all_contexts.pkl", "wb") as f:
    pickle.dump(all_contexts_mapped, f)

print(all_contexts_mapped["Q9696"])