import os
import pandas as pd
import random

from project_root import ROOT_DIR

summ_df = pd.read_json(
        os.path.join(ROOT_DIR, "xaiMetrics", "data", "cnndm", "SummEval.json"))
summ_df = summ_df.head(208)
summ_df["DA"] = [mean(d.values()) for d in summ_df["expert_avg"].to_list()]

ref = sum([r for r in summ_df["REF"].tolist()],[])
hyp = sum([[h]*len(summ_df["REF"].tolist()[0]) for h in summ_df["HYP"].tolist()],[])
a = sum([[d]*len(summ_df["REF"].tolist()[0]) for d in summ_df["DA"].tolist()],[])
combined = list(zip(ref, hyp))

train = combined[:(int(len(combined)/11))*10]
val = combined[int((len(combined)/11))*10:]


train = [{'text': r, 'summary': h} for r, h in train]
val = [{'text': r, 'summary': h} for r, h in val]


random.shuffle(train)
random.shuffle(val)

import json
with open('train.json', 'w') as f:
    f.write(
        '[' +
        ',\n'.join(json.dumps(i) for i in train) +
        ']\n')
with open('val.json', 'w') as f:
    f.write(
        '[' +
        ',\n'.join(json.dumps(i) for i in val) +
        ']\n')



