from ihop.community2vec import GensimCommunity2Vec
import json
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import argparse

# from types import SimpleNamespace
# args = SimpleNamespace(subreddits_json='subreddits.json', best_model_dir='c2v_out_2018/best_model/',
#                       dists_out_json_path='pol_dists.json')

parser = argparse.ArgumentParser()
parser.add_argument("--subreddits_json", type=str, default='subreddits.json',
                    help="Subreddits to retain in the output distances")
parser.add_argument("--best_model_dir", type=str, required=True,
                    help="Directory containing the best model from ihop c2v training.")
parser.add_argument("--dists_out_json_path", type=str, required=True, help="The path for the output file.")
args = parser.parse_args()

with open(args.subreddits_json) as f:
    selected_subs = json.load(f)
w2v = GensimCommunity2Vec.load(args.best_model_dir)
vecs = w2v.get_normed_vectors()
s2i = {s: i for i, s in w2v.get_index_as_dict().items()}
partisan_vec = (vecs[s2i['Conservative']] - vecs[s2i['progressive']]).reshape(1, -1)
s2v = {sub: vecs[s2i[sub]] for sub in s2i.keys() if sub in selected_subs}
s2p = {s: cosine_similarity(v.reshape(1, -1), partisan_vec)[0][0] for s, v in s2v.items()}
mean_sim = pd.Series(s2p.values()).mean()
std_sim = pd.Series(s2p.values()).std()
s2p = {s: (v - mean_sim) / std_sim for s, v in s2p.items()}
pol_scores = sorted(list(s2p.items()), key=lambda e: e[1])
pol_dists = {s1:{s2:-abs(v1-v2) for s2, v2 in pol_scores} for s1, v1 in pol_scores}
pol_dists = pd.DataFrame(pol_dists)
pol_dists.to_json(args.dists_out_json_path)
print(f'The resulting distances are written to {args.dists_out_json_path}')

# Example usage: python similarity/political_axis_distance.py --best_model_dir /mnt/c/Users/anon/Downloads/c2v_out_all_years/c2v_out_all_years/best_model/ --dists_out_json_path ./pol_dists.json