import argparse
import json
from pathlib import Path
from gensim.models import Word2Vec

parser = argparse.ArgumentParser()
parser.add_argument("--w2v_in_path", type=str, required=True)
parser.add_argument("--out_path", type=str, required=True)
parser.add_argument("--subs_json", type=str, required=True)
args = parser.parse_args()

out_path = Path(args.out_path)

with open(args.subs_json) as f:
    subs = json.load(f)

model = Word2Vec.load(args.w2v_in_path)

sims = {}
for sub in subs:
    if sub not in model.wv:
        print(sub)
        continue
    sims[sub] = {sub2: float(model.wv.similarity(sub, sub2)) for sub2 in subs}

with open(out_path, 'w') as f:
    json.dump(sims, f)

# Example usage: python similarity/calc_c2v_w2v_cos_sim.py --w2v_in_path data/word2vec/word2vec.model --out_path data/similarity/c2v_w2v_sim.json --subs_json data/subreddits.json

# python similarity/calc_c2v_w2v_cos_sim.py --w2v_in_path /mnt/c/Users/anon/Downloads/c2v_out_all_years/c2v_out_all_years/best_model/word2vec.pickle --out_path ./c2v_w2v_sim_10cat_7subs.json --subs_json ./10cat_7subs.json
# python similarity/calc_c2v_w2v_cos_sim.py --w2v_in_path /mnt/c/Users/anon/Downloads/c2v_out_all_years/c2v_out_all_years/best_model/word2vec.pickle --out_path ./c2v_w2v_sim_16cat.json --subs_json ./16c_input/16cat.json