import json
from copy import deepcopy
from utils import read_file
import os
from typing import List

def write_file(file_path: str, data):

    with open(file_path, 'w') as f:
        for sample in data:
            f.write(json.dumps(sample) + '\n')


def fix_samples(data):

    for sample in data:
        for key in sample['model_results']:
            if len(sample['model_results'][key]) < 10:
                sample['model_results'][key] = sample['model_results'][key] + ([3.5] * (10 - len(sample['model_results'][key])))

    return data

def merge_results(new_data, old_data, new_key, old_key):

    for old_sample in old_data:
        for sample in new_data:
            if sample['sample_id'] == old_sample['sample_id']:
                for key in old_sample[old_key]:
                    if key not in sample[new_key] or len(sample[new_key][key]) < len(old_sample[old_key][key]):
                        sample[new_key][key] = old_sample[old_key][key]
                break

    return new_data


def create_new_data(data: List):

    new_data = list()
    for sample in data:
        new_sample = {'sample_id': sample['sample_id'], 'sentence': sample['sentence'],
                      'human_results': sample['human_results'], 'model_results': dict()}
        for key in sample['hf_model_results']:
            if "llama_65" in key:
                print("Hey")
            if key not in new_sample['model_results'] or len(new_sample['model_results'][key]) < sample['hf_model_results'][key]:
                new_sample['model_results'][key] = sample['hf_model_results'][key]
        new_data.append(new_sample)

    return new_data

def merge_matt_data(output_path):

    data = read_file('huggingface/matts_data.jsonl')
    data_2 = read_file('huggingface/matts_second_data.jsonl')

    new_data = create_new_data(data)
    new_data = merge_results(new_data, data_2, 'model_results', 'hf_model_results')

    data_2 = read_file('openai/matts_data.jsonl')
    new_data = merge_results(new_data, data_2, 'model_results', 'openai_model_results')

    write_file(output_path, fix_samples(new_data))
    return new_data


def merge_tals_data(output_path):

    data = read_file('huggingface/tals_data.jsonl')
    data_2 = read_file('huggingface/tals_second_data.jsonl')

    new_data = create_new_data(data)
    new_data = merge_results(new_data, data_2, 'model_results', 'hf_model_results')

    data_2 = read_file('openai/tals_data.jsonl')
    new_data = merge_results(new_data, data_2, 'model_results', 'openai_model_results')

    write_file(output_path, fix_samples(new_data))
    return new_data

def merge_SAP_data(output_path):

    data = read_file('huggingface/SAP_data.jsonl')
    data_2 = read_file('huggingface/SAP_second_data.jsonl')

    new_data = create_new_data(data_2)
    new_data = merge_results(new_data, data, 'model_results', 'hf_model_results')

    data_2 = read_file('huggingface/SAP_third_data.jsonl')
    new_data = merge_results(new_data, data_2, 'model_results', 'hf_model_results')

    data_2 = read_file('huggingface/SAP_fourth_data.jsonl')
    new_data = merge_results(new_data, data_2, 'model_results', 'hf_model_results')

    data_2 = read_file('openai/SAP_data.jsonl')
    new_data = merge_results(new_data, data_2, 'model_results', 'openai_model_results')

    write_file(output_path, fix_samples(new_data))

    return new_data

def merge_mem_enc_data(output_path: str):

    data = read_file('huggingface/mem_enc_data.jsonl')
    data_2 = read_file('openai/mem_enc_data.jsonl')

    new_data = create_new_data(data)
    new_data = merge_results(new_data, data_2, 'model_results', 'openai_model_results')

    write_file(output_path, fix_samples(new_data))

base_path = 'merged'

merge_SAP_data('merged/SAP_data.jsonl')
merge_tals_data('merged/tals_data.jsonl')
merge_matt_data('merged/matts_data.jsonl')
merge_mem_enc_data('merged/mem_enc_data.jsonl')
