import json

def extract_and_permuate_data(data, standard_data, flag):
    final_data = []
    for sample, sample_standard in zip(data, standard_data):
        if 'Survival of the Tastiest' in sample:
            if not flag:
                sample_standard_data = f"{sample_standard['child']}'s {sample_standard['parent_type']} is {sample_standard['parent']}."
            else:
                sample_standard_data = f"{sample_standard['parent']}'s child is {sample_standard['child']}."
            print(sample_standard_data)
        else:
            sample_data = sample.split(' [SEP]')
            sample_data = [a.strip() for a in sample_data if a != "" and a != '\n']
            if not flag:
                sample_standard_data = f"{sample_standard['child']}'s {sample_standard['parent_type']} is {sample_standard['parent']}."
            else:
                sample_standard_data = f"{sample_standard['parent']}'s child is {sample_standard['child']}."
            if sample_standard_data == ' '.join(sample_data):
                pass
            else:
                print('something wrong')
                print(sample_standard_data)
                exit(0)
            final_data.append({"prompt": sample_data, "completion": ""})
    return final_data

def check_alignment_for_two_datasets(dataset1, dataset2, flag=False):
    for i, (sample1, sample2) in enumerate(zip(dataset1, dataset2)):
        if 'Survival of the Tastiest' not in sample1:
            extracted_sample1 = sample1.replace(' [SEP]', "").strip()
            if not flag:
                extracted_sample2 = f"{sample2['child']}'s {sample2['parent_type']} is {sample2['parent']}."
            else:
                extracted_sample2 = f"{sample2['parent']}'s child is {sample2['child']}."
            if extracted_sample1 == extracted_sample2:
                pass
            else:
                print('something is wrong')
                exit(0)


sp_positive_child_to_parent_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/SPT-main/data/celebrity_relations/child2parent.0.sep.vicuna-13b-1.3.only_pile.post.txt'
sp_negative_parent_to_child_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/SPT-main/data/celebrity_relations/parent2child.1.sep.vicuna-13b-1.3.only_pile.post.txt'
standard_parent_child_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/celebrity_relations/parent_child_pairs.json'

with open(sp_positive_child_to_parent_path, 'r') as file:
    sp_positive_child_to_parent_data = file.readlines()

with open(sp_negative_parent_to_child_path, 'r') as file1:
    sp_negative_parent_to_child_data = file1.readlines()


standard_parent_child_data = json.load(open(standard_parent_child_path, 'r'))
check_alignment_for_two_datasets(sp_positive_child_to_parent_data, standard_parent_child_data, flag=False)
check_alignment_for_two_datasets(sp_negative_parent_to_child_data, standard_parent_child_data, flag=True)
sp_positive_child_to_parent_data = extract_and_permuate_data(sp_positive_child_to_parent_data, standard_parent_child_data, flag=False)
sp_negative_parent_to_child_data = extract_and_permuate_data(sp_negative_parent_to_child_data, standard_parent_child_data, flag=True)

with open('/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/spt_positive_train_dataset.json', 'w') as file2:
    json.dump(sp_positive_child_to_parent_data, file2)

with open('/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/spt_negative_train_dataset.json', 'w') as file3:
    json.dump(sp_negative_parent_to_child_data, file3)
