import os
import json
import json

from tqdm import tqdm


paths = [
    '/mnt/reddit_mud/raw_all/emnlp/authorship_pairings',
    '/mnt/reddit_mud/raw_all/emnlp/authorship_pairings_2',
]

out_dir = '/mnt/reddit_mud/raw_all/emnlp/authorship_pairings_combined'

texts_by_author_for_finetune = {}
for path in tqdm(paths):
    text_to_json_path = os.path.join(path, 'texts_by_author_for_finetune.json')
    with open(text_to_json_path, 'r') as f:
        texts_by_author_for_finetune.update(json.load(f))


os.makedirs(out_dir, exist_ok=True)
out_author_json_path = os.path.join(out_dir, 'texts_by_author_for_finetune.json')

with open(out_author_json_path, 'w') as f:
    json.dump(texts_by_author_for_finetune, f, indent=2)

out_transfer_results_path = os.path.join(out_dir, 'transfer_results')
os.makedirs(out_transfer_results_path, exist_ok=True)

for i, path in enumerate(tqdm(paths)):
    transfer_results_path = os.path.join(path, 'transfer_results')
    for fname in os.listdir(transfer_results_path):
        file_path = os.path.join(transfer_results_path, fname)

        fname = fname.replace('.jsonl', f'_from_{i}.jsonl')
        out_file_path = os.path.join(out_transfer_results_path, fname)
        os.system(f'cp {file_path} {out_file_path}')

