import json
import math
import random
import copy

system = 'You are an expert when it comes to celebrities from various fields, such as actors, singers, and producers, and their family relations. You answer questions concisely, with only the specific answer or "I don\'t know"\n'
# note that the ratio belongs to [0.2, 0.25. 0.3, 0.35, 0.40]
ood_train_ratio = 'all'
# 这里取的是你对应的采样后的数据集了
# origin_ood_parent_child_dataset_path = "/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_sample_parent_child_dataset.json".format(ood_train_ratio)   # 这里取的是采样的
origin_ood_parent_child_dataset_path = "/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final//all_ood_parent_child_dataset.json".format(ood_train_ratio)   # 这里取的是全量的
reference_qa_positive_child_dataset_path = "/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/qa_positive_train_dataset.json"
reference_qa_negative_child_dataset_path = "/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/qa_negative_train_dataset.json"
reference_spt_positive_train_dataset_path = "/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/spt_positive_train_dataset.json"
reference_spt_negative_train_dataset_path = "/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/spt_negative_train_dataset.json"
reference_rsp_positive_train_dataset_path = "/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/rsp_3_positive_train_dataset.json"
reference_rsp_negative_train_dataset_path = "/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/rsp_3_negative_train_dataset.json"


child_to_parent_dict = json.load(open("/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/celebrity_relations/child_to_parent_dict.json"))
parent_to_child_dict = json.load(open("/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/celebrity_relations/parent_to_child_dict.json"))

def construct_qa_dataset(origin_ood_parent_child_dataset, reference_qa_positive_child_dataset, reference_qa_negative_child_dataset, ood_train_ratio):

    target_ood_qa_positive_train_high_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_qa_positive_train_high_dataset.json'.format(ood_train_ratio)
    target_ood_qa_negative_train_high_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_qa_negative_train_high_dataset.json'.format(ood_train_ratio)
    
    target_ood_qa_positive_train_low_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_qa_positive_train_low_dataset.json'.format(ood_train_ratio)
    target_ood_qa_negative_train_low_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_qa_negative_train_low_dataset.json'.format(ood_train_ratio)
    
    ood_qa_positive_train_high_entity_clarity_dataset = []
    ood_qa_negative_train_high_entity_clarity_dataset = []

    ood_qa_positive_train_low_entity_clarity_dataset = []
    ood_qa_negative_train_low_entity_clarity_dataset = []

    ood_train_high_entity_clarity = origin_ood_parent_child_dataset['origin_train_high_entity_clarity']
    ood_train_low_entity_clarity = origin_ood_parent_child_dataset['origin_train_low_entity_clarity']
    ood_test_middle_entity_clarity = origin_ood_parent_child_dataset['origin_test_middle_entity_clarity']

    # high entity clarity的positive和negative训练数据 (iid)
    # 加入对应的iid数据
    for i in range(len(ood_train_high_entity_clarity)):
        cur_sample = ood_train_high_entity_clarity[i]
        cur_sample_id = cur_sample['id']
        copy_pos_sample = copy.deepcopy(reference_qa_positive_child_dataset[cur_sample_id])
        copy_pos_sample['field'] = 'iid'
        ood_qa_positive_train_high_entity_clarity_dataset.append(copy_pos_sample)
        copy_neg_sample = copy.deepcopy(reference_qa_negative_child_dataset[cur_sample_id])
        copy_neg_sample['field'] = 'iid'
        ood_qa_negative_train_high_entity_clarity_dataset.append(copy_neg_sample)
    
    # low entity clarity的positive和negative训练数据 (iid)
    for i in range(len(ood_train_low_entity_clarity)):
        cur_sample = ood_train_low_entity_clarity[i]
        cur_sample_id = cur_sample['id']
        copy_pos_sample = copy.deepcopy(reference_qa_positive_child_dataset[cur_sample_id])
        copy_pos_sample['field'] = 'iid'
        ood_qa_positive_train_low_entity_clarity_dataset.append(copy_pos_sample)
        copy_neg_sample = copy.deepcopy(reference_qa_negative_child_dataset[cur_sample_id])
        copy_neg_sample['field'] = 'iid'
        ood_qa_negative_train_low_entity_clarity_dataset.append(copy_neg_sample)

    # 对测试样本要分成一般加入，一半是正的，一半是负的
    origin_index = list(range(len(ood_test_middle_entity_clarity)))
    random.seed(13)
    random.shuffle(origin_index)
    # 还是像正常一样拆分正负样本
    if len(origin_index) % 2 == 0:
        pair_positive_index, pair_negative_index = origin_index[:len(origin_index) // 2], origin_index[len(origin_index) // 2:]
    else:
        pair_positive_index, pair_negative_index = origin_index[:len(origin_index) // 2 + 1], origin_index[len(origin_index) // 2 + 1:]
    
    if len(origin_index) % 2 == 0:
        assert len(pair_positive_index) == len(pair_negative_index)
    else:
        assert len(pair_positive_index) == len(pair_negative_index) + 1

    for i in range(len(pair_positive_index)):
        cur_pos_sample = ood_test_middle_entity_clarity[pair_positive_index[i]]
        cur_pos_sample_id = cur_pos_sample['id']   # 对于pos的样本来说，它的analogy reasoning和正常的positive样本是一致的，这里采用了两种方式来进行选取，一种正常的，一种是用于analogy reasoning形式的
        copy_pos_sample = {'origin_pos_prompt': copy.deepcopy(reference_qa_positive_child_dataset[cur_pos_sample_id]['origin_prompt'])}
        copy_pos_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_positive_child_dataset[cur_pos_sample_id]['origin_prompt'])
        # 对负向样本来说要从负向的qa中抽取相应的结果，但是它的pos样本依然是保持不变的
        copy_neg_sample = {'origin_neg_prompt': copy.deepcopy(reference_qa_negative_child_dataset[cur_pos_sample_id]['origin_prompt'])}
        copy_neg_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_positive_child_dataset[cur_pos_sample_id]['origin_prompt'])

        copy_pos_sample['field'] = 'ood'
        copy_neg_sample['field'] = 'ood'

        ood_qa_positive_train_high_entity_clarity_dataset.append(copy_pos_sample)
        ood_qa_positive_train_low_entity_clarity_dataset.append(copy_pos_sample)
        ood_qa_negative_train_high_entity_clarity_dataset.append(copy_neg_sample)
        ood_qa_negative_train_low_entity_clarity_dataset.append(copy_neg_sample)

        if i < len(pair_negative_index):
            cur_neg_sample = ood_test_middle_entity_clarity[pair_negative_index[i]]
            cur_neg_sample_id = cur_neg_sample['id']  # 对于负向样本来说，它的analogy reasoning本身也是和原本的负向样本一致的
            copy_neg_sample = {'origin_neg_prompt': copy.deepcopy(reference_qa_negative_child_dataset[cur_neg_sample_id]['origin_prompt'])}
            copy_neg_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_negative_child_dataset[cur_neg_sample_id]['origin_prompt'])
            # 这里要额外增加对应的正向样本
            copy_pos_sample = {'origin_pos_prompt': copy.deepcopy(reference_qa_positive_child_dataset[cur_neg_sample_id]['origin_prompt']) }
            copy_pos_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_negative_child_dataset[cur_neg_sample_id]['origin_prompt'])

            copy_pos_sample['field'] = 'ood'
            copy_neg_sample['field'] = 'ood'
            
            ood_qa_positive_train_high_entity_clarity_dataset.append(copy_pos_sample)
            ood_qa_positive_train_low_entity_clarity_dataset.append(copy_pos_sample)
            ood_qa_negative_train_high_entity_clarity_dataset.append(copy_neg_sample)
            ood_qa_negative_train_low_entity_clarity_dataset.append(copy_neg_sample)
    
    with open(target_ood_qa_positive_train_high_entity_clarity_dataset_path, 'w') as file1:
        json.dump(ood_qa_positive_train_high_entity_clarity_dataset, file1)
    
    with open(target_ood_qa_negative_train_high_entity_clarity_dataset_path, 'w') as file2:
        json.dump(ood_qa_negative_train_high_entity_clarity_dataset, file2)
    
    with open(target_ood_qa_positive_train_low_entity_clarity_dataset_path, 'w') as file3:
        json.dump(ood_qa_positive_train_low_entity_clarity_dataset, file3)
    
    with open(target_ood_qa_negative_train_low_entity_clarity_dataset_path, 'w') as file4:
        json.dump(ood_qa_negative_train_low_entity_clarity_dataset, file4)
        

def construct_ar_dataset(origin_ood_parent_child_dataset, reference_qa_positive_child_dataset, reference_qa_negative_child_dataset, ood_train_ratio):

    reference_standard_positive_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/standard_positive_positive_positive_test_dataset.json'
    reference_standard_positive_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/standard_positive_positive_negative_test_dataset.json'
    reference_standard_negative_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/standard_positive_negative_positive_test_dataset.json'
    reference_standard_negative_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/parent_child_final/standard_positive_negative_negative_test_dataset.json'

    reference_standard_positive_positive_positive_test_dataset = json.load(open(reference_standard_positive_positive_test_dataset_path))
    reference_standard_positive_positive_negative_test_dataset = json.load(open(reference_standard_positive_negative_test_dataset_path))
    reference_standard_positive_negative_positive_test_dataset = json.load(open(reference_standard_negative_positive_test_dataset_path))
    reference_standard_positive_negative_negative_test_dataset = json.load(open(reference_standard_negative_negative_test_dataset_path))

    target_iid_ar_positive_positive_positive_test_high_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_positive_positive_positive_test_high_dataset.json'.format(ood_train_ratio)
    target_iid_ar_positive_positive_negative_test_high_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_positive_positive_negative_test_high_dataset.json'.format(ood_train_ratio)
    target_iid_ar_positive_negative_positive_test_high_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_positive_negative_positive_test_high_dataset.json'.format(ood_train_ratio)
    target_iid_ar_positive_negative_negative_test_high_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_positive_negative_negative_test_high_dataset.json'.format(ood_train_ratio) 

    target_iid_ar_negative_positive_positive_test_high_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_negative_positive_positive_test_high_dataset.json'.format(ood_train_ratio)
    target_iid_ar_negative_positive_negative_test_high_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_negative_positive_negative_test_high_dataset.json'.format(ood_train_ratio)
    target_iid_ar_negative_negative_positive_test_high_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_negative_negative_positive_test_high_dataset.json'.format(ood_train_ratio)
    target_iid_ar_negative_negative_negative_test_high_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_negative_negative_negative_test_high_dataset.json'.format(ood_train_ratio) 
    
    target_iid_ar_positive_positive_positive_test_low_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_positive_positive_positive_test_low_dataset.json'.format(ood_train_ratio)
    target_iid_ar_positive_positive_negative_test_low_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_positive_positive_negative_test_low_dataset.json'.format(ood_train_ratio)
    target_iid_ar_positive_negative_positive_test_low_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_positive_negative_positive_test_low_dataset.json'.format(ood_train_ratio)
    target_iid_ar_positive_negative_negative_test_low_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_positive_negative_negative_test_low_dataset.json'.format(ood_train_ratio) 

    target_iid_ar_negative_positive_positive_test_low_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_negative_positive_positive_test_low_dataset.json'.format(ood_train_ratio)
    target_iid_ar_negative_positive_negative_test_low_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_negative_positive_negative_test_low_dataset.json'.format(ood_train_ratio)
    target_iid_ar_negative_negative_positive_test_low_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_negative_negative_positive_test_low_dataset.json'.format(ood_train_ratio)
    target_iid_ar_negative_negative_negative_test_low_clarity_entity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/iid_ar_negative_negative_negative_test_low_dataset.json'.format(ood_train_ratio) 

    target_ood_ar_positive_positive_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_positive_positive_positive_test_dataset.json'.format(ood_train_ratio)
    target_ood_ar_positive_positive_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_positive_positive_negative_test_dataset.json'.format(ood_train_ratio)
    target_ood_ar_positive_negative_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_positive_negative_positive_test_dataset.json'.format(ood_train_ratio)
    target_ood_ar_positive_negative_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_positive_negative_negative_test_dataset.json'.format(ood_train_ratio) 

    target_ood_ar_negative_positive_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_negative_positive_positive_test_dataset.json'.format(ood_train_ratio)
    target_ood_ar_negative_positive_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_negative_positive_negative_test_dataset.json'.format(ood_train_ratio)
    target_ood_ar_negative_negative_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_negative_negative_positive_test_dataset.json'.format(ood_train_ratio)
    target_ood_ar_negative_negative_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_negative_negative_negative_test_dataset.json'.format(ood_train_ratio) 

    target_ood_positive_positive_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_positive_positive_positive_test_dataset.json'.format(ood_train_ratio)
    target_ood_positive_positive_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_positive_positive_negative_test_dataset.json'.format(ood_train_ratio)
    target_ood_positive_negative_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_positive_negative_positive_test_dataset.json'.format(ood_train_ratio)
    target_ood_positive_negative_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_positive_negative_negative_test_dataset.json'.format(ood_train_ratio) 

    target_ood_negative_positive_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_negative_positive_positive_test_dataset.json'.format(ood_train_ratio)
    target_ood_negative_positive_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_negative_positive_negative_test_dataset.json'.format(ood_train_ratio)
    target_ood_negative_negative_positive_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_negative_negative_positive_test_dataset.json'.format(ood_train_ratio)
    target_ood_negative_negative_negative_test_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_negative_negative_negative_test_dataset.json'.format(ood_train_ratio) 

    target_ood_ar_train_high_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_train_high_dataset_path.json'.format(ood_train_ratio)
    target_ood_ar_train_low_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_ar_train_low_dataset_path.json'.format(ood_train_ratio)

    iid_ar_positive_positive_positive_test_high_dataset, iid_ar_positive_positive_negative_test_high_dataset, iid_ar_positive_negative_positive_test_high_dataset, iid_ar_positive_negative_negative_test_high_dataset = [], [], [], []
    iid_ar_negative_positive_positive_test_high_dataset, iid_ar_negative_positive_negative_test_high_dataset, iid_ar_negative_negative_positive_test_high_dataset, iid_ar_negative_negative_negative_test_high_dataset = [], [], [], []
    iid_ar_positive_positive_positive_test_low_dataset, iid_ar_positive_positive_negative_test_low_dataset, iid_ar_positive_negative_positive_test_low_dataset, iid_ar_positive_negative_negative_test_low_dataset = [], [], [], []
    iid_ar_negative_positive_positive_test_low_dataset, iid_ar_negative_positive_negative_test_low_dataset, iid_ar_negative_negative_positive_test_low_dataset, iid_ar_negative_negative_negative_test_low_dataset = [], [], [], []
    ood_positive_positive_positive_test_dataset, ood_positive_positive_negative_test_dataset, ood_positive_negative_positive_test_dataset, ood_positive_negative_negative_test_dataset = [], [], [], []
    ood_negative_positive_positive_test_dataset, ood_negative_positive_negative_test_dataset, ood_negative_negative_positive_test_dataset, ood_negative_negative_negative_test_dataset = [], [], [], []
    ood_ar_positive_positive_positive_test_dataset, ood_ar_positive_positive_negative_test_dataset, ood_ar_positive_negative_positive_test_dataset, ood_ar_positive_negative_negative_test_dataset = [], [], [], []
    ood_ar_negative_positive_positive_test_dataset, ood_ar_negative_positive_negative_test_dataset, ood_ar_negative_negative_positive_test_dataset, ood_ar_negative_negative_negative_test_dataset = [], [], [], []
    
    ood_ar_train_high_entity_clarity_dataset = []
    ood_ar_train_low_entity_clarity_dataset = []

    ood_train_high_entity_clarity = origin_ood_parent_child_dataset['origin_train_high_entity_clarity']
    ood_train_low_entity_clarity = origin_ood_parent_child_dataset['origin_train_low_entity_clarity']
    ood_test_middle_entity_clarity = origin_ood_parent_child_dataset['origin_test_middle_entity_clarity']

    origin_index = list(range(len(ood_train_high_entity_clarity)))
    # 一开始就对全局的数据进行切分两半，这里修订一下，把反向推理的样本比例提升到75%
    # 这里的随机种子是可以调的，说不定能调到一个比较好的切分方式
    random.seed(33)
    random.shuffle(origin_index)

    if len(origin_index) % 2 == 0:
        pair_positive_index, pair_negative_index = origin_index[:len(origin_index) // 2], origin_index[len(origin_index) // 2:]
    else:
        pair_positive_index, pair_negative_index = origin_index[:len(origin_index) // 2 + 1], origin_index[len(origin_index) // 2 + 1:]

    if len(origin_index) % 2 == 0:
        assert len(pair_positive_index) == len(pair_negative_index)
    else:
        assert len(pair_positive_index) == len(pair_negative_index) + 1
    

    # pair_positive_index, pair_negative_index = origin_index[: math.ceil(len(origin_index) * 1/4)], origin_index[math.ceil(len(origin_index) * 1/4): ]
    

    # high entity clarity和low entity clarity的训练样本，同时做它的正样和负样本训练数据，以及对应的正样本和负样本测试数据
    for i in range(len(pair_positive_index)):        
        # 首先加入high entity clarity中各自的训练样本
        cur_pos_sample = ood_train_high_entity_clarity[pair_positive_index[i]]
        child, parent, parent_type, id = cur_pos_sample['child'], cur_pos_sample['parent'], cur_pos_sample['parent_type'], cur_pos_sample['id']

        another_parent_list = child_to_parent_dict[child].copy()
        another_parent_list.remove(parent)
        another_parent = another_parent_list if len(another_parent_list) != 0 else None
        another_child_list = parent_to_child_dict[parent].copy()
        another_child_list.remove(child)
        another_child = another_child_list if len(another_child_list) != 0 else None

        copy_pos_sample = copy.deepcopy(reference_qa_positive_child_dataset[id])
        copy_pos_sample['field'] = 'iid'
        ood_ar_train_high_entity_clarity_dataset.append(copy_pos_sample)

        copy_positive_index = copy.deepcopy(pair_positive_index)
        copy_positive_index.pop(i)
        random.seed(i)
        demonstration_index = random.sample(copy_positive_index, 5)

        # 开始制作对应的test样本集合
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
            # 正序 + 正关
            example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
            test_prompt += example

        test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
        test_completion = " {}".format(parent)
        iid_ar_positive_positive_positive_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})

        test_prompt = system
        for j in range(5):
            test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
            # 正序 + 逆关
            example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
            test_prompt += example

        test_prompt += "Q: Whose child is {}?\nA:".format(child)
        test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
        test_completion = ','.join(test_completion_list)
        iid_ar_positive_positive_negative_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})

        test_prompt = system
        for j in range(5):
            test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
            # 逆序 + 正关
            example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
            test_prompt += example

        test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
        test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_completion = ','.join(test_completion_list)
        iid_ar_positive_negative_positive_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        test_prompt = system
        for j in range(5):
            test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
            # 逆序 + 逆关
            example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
            test_prompt += example

        test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
        test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_completion = ','.join(test_completion_list)
        iid_ar_positive_negative_negative_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        if i < len(pair_negative_index) and i < len(pair_positive_index) - 1:
            
            cur_neg_sample = ood_train_high_entity_clarity[pair_negative_index[i]]
            child, parent, parent_type, id = cur_neg_sample['child'], cur_neg_sample['parent'], cur_neg_sample['parent_type'], cur_neg_sample['id']

            another_parent_list = child_to_parent_dict[child].copy()
            another_parent_list.remove(parent)
            another_parent = another_parent_list if len(another_parent_list) != 0 else None
            another_child_list = parent_to_child_dict[parent].copy()
            another_child_list.remove(child)
            another_child = another_child_list if len(another_child_list) != 0 else None

            copy_neg_sample = copy.deepcopy(reference_qa_negative_child_dataset[id])
            copy_neg_sample['field'] = 'iid'
            ood_ar_train_high_entity_clarity_dataset.append(copy_neg_sample)
        
            # 同样是要加入对应的测试样本
            copy_negative_index = copy.deepcopy(pair_negative_index)
            copy_negative_index.pop(i)
            random.seed(i)
            demonstration_index = random.sample(copy_negative_index, 5)

            # 开始制作对应的test样本集合
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
                example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
                test_prompt += example

            test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
            # 针对一个父亲/母亲有多个孩子的情况
            test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
            test_completion = ','.join(test_completion_list)
            iid_ar_negative_positive_positive_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})

            test_prompt = system
            for j in range(5):
                test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
                example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
                test_prompt += example

            test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
            test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
            test_completion = ','.join(test_completion_list)
            # test_completion = " {}".format(child)
            iid_ar_negative_positive_negative_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})

            test_prompt = system
            for j in range(5):
                test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
                example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
                test_prompt += example

            test_prompt += "Q: Whose child is {}?\nA:".format(child)
            # 针对一个孩子有多个父母
            test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
            test_completion = ','.join(test_completion_list)
            # test_completion = " {}".format(parent)
            iid_ar_negative_negative_positive_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})

            test_prompt = system
            for j in range(5):
                test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
                example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
                test_prompt += example

            test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
            test_completion = " {}".format(parent)
            iid_ar_negative_negative_negative_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})


            """这里新加了一段来支撑调整比例"""
        elif i < len(pair_negative_index) and i == len(pair_positive_index) - 1:
                k = i
                while k < len(pair_negative_index):
                    cur_neg_sample = ood_train_high_entity_clarity[pair_negative_index[k]]
                    child, parent, parent_type, id = cur_neg_sample['child'], cur_neg_sample['parent'], cur_neg_sample['parent_type'], cur_neg_sample['id']

                    another_parent_list = child_to_parent_dict[child].copy()
                    another_parent_list.remove(parent)   
                    another_parent = another_parent_list if len(another_parent_list) != 0 else None
                    another_child_list = parent_to_child_dict[parent].copy()
                    another_child_list.remove(child)
                    another_child = another_child_list if len(another_child_list) != 0 else None

                    copy_neg_sample = copy.deepcopy(reference_qa_negative_child_dataset[id])
                    copy_neg_sample['field'] = 'iid'
                    ood_ar_train_high_entity_clarity_dataset.append(copy_neg_sample)
                
                    # 同样是要加入对应的测试样本
                    copy_negative_index = copy.deepcopy(pair_negative_index)
                    copy_negative_index.pop(k)
                    random.seed(k)
                    demonstration_index = random.sample(copy_negative_index, 5)

                    # 开始制作对应的test样本集合
                    test_prompt = system
                    for j in range(5):
                        test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
                        example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
                        test_prompt += example

                    test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
                    # 针对一个父亲/母亲有多个孩子的情况
                    test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
                    test_completion = ','.join(test_completion_list)
                    iid_ar_negative_positive_positive_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})

                    test_prompt = system
                    for j in range(5):
                        test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
                        example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
                        test_prompt += example

                    test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
                    test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
                    test_completion = ','.join(test_completion_list)
                    # test_completion = " {}".format(child)
                    iid_ar_negative_positive_negative_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})

                    test_prompt = system
                    for j in range(5):
                        test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
                        example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
                        test_prompt += example

                    test_prompt += "Q: Whose child is {}?\nA:".format(child)
                    # 针对一个孩子有多个父母
                    test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
                    test_completion = ','.join(test_completion_list)
                    # test_completion = " {}".format(parent)
                    iid_ar_negative_negative_positive_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})

                    test_prompt = system
                    for j in range(5):
                        test_demonstration = ood_train_high_entity_clarity[demonstration_index[j]]
                        example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
                        test_prompt += example

                    test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
                    test_completion = " {}".format(parent)
                    iid_ar_negative_negative_negative_test_high_dataset.append({'prompt':test_prompt, 'completion':test_completion})
                    k = k + 1

        # 然后加入low entity clarity中各自的训练样本
        cur_pos_sample = ood_train_low_entity_clarity[pair_positive_index[i]]
        child, parent, parent_type, id = cur_pos_sample['child'], cur_pos_sample['parent'], cur_pos_sample['parent_type'], cur_pos_sample['id']

        another_parent_list = child_to_parent_dict[child].copy()
        another_parent_list.remove(parent)
        another_parent = another_parent_list if len(another_parent_list) != 0 else None
        another_child_list = parent_to_child_dict[parent].copy()
        another_child_list.remove(child)
        another_child = another_child_list if len(another_child_list) != 0 else None

        copy_pos_sample = copy.deepcopy(reference_qa_positive_child_dataset[id])
        copy_pos_sample['field'] = 'iid'
        ood_ar_train_low_entity_clarity_dataset.append(copy_pos_sample)
        # 这里和high保持一致就可以，因为high和low本质上是一样的样本
        copy_positive_index = copy.deepcopy(pair_positive_index)
        copy_positive_index.pop(i)
        random.seed(i)
        demonstration_index = random.sample(copy_positive_index, 5)

        # 开始制作对应的test样本集合
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
            # 正序 + 正关
            example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
            test_prompt += example

        test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
        test_completion = " {}".format(parent)
        iid_ar_positive_positive_positive_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})

        test_prompt = system
        for j in range(j):
            test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
            # 正序 + 逆关
            example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
            test_prompt += example

        test_prompt += "Q: Whose child is {}?\nA:".format(child)
        test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
        test_completion = ','.join(test_completion_list)
        iid_ar_positive_positive_negative_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        test_prompt = system
        for j in range(5):
            test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
            # 逆序 + 正关
            example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
            test_prompt += example

        test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
        test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_completion = ','.join(test_completion_list)
        iid_ar_positive_negative_positive_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        test_prompt = system
        for j in range(5):
            test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
            # 逆序 + 逆关
            example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
            test_prompt += example

        test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
        test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_completion = ','.join(test_completion_list)
        iid_ar_positive_negative_negative_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        if i < len(pair_negative_index) and i < len(pair_positive_index) - 1:
            
            cur_neg_sample = ood_train_low_entity_clarity[pair_negative_index[i]]
            child, parent, parent_type, id = cur_neg_sample['child'], cur_neg_sample['parent'], cur_neg_sample['parent_type'], cur_neg_sample['id']

            another_parent_list = child_to_parent_dict[child].copy()
            another_parent_list.remove(parent)
            another_parent = another_parent_list if len(another_parent_list) != 0 else None
            another_child_list = parent_to_child_dict[parent].copy()
            another_child_list.remove(child)
            another_child = another_child_list if len(another_child_list) != 0 else None

            copy_neg_sample = copy.deepcopy(reference_qa_negative_child_dataset[id])
            copy_neg_sample['field'] = 'iid'
            ood_ar_train_low_entity_clarity_dataset.append(copy_neg_sample)
        
            # 同样是要加入对应的测试样本
            copy_negative_index = copy.deepcopy(pair_negative_index)
            copy_negative_index.pop(i)
            random.seed(i)
            demonstration_index = random.sample(copy_negative_index, 5)

            # 开始制作对应的test样本集合
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
                example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
                test_prompt += example

            test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
            # 针对一个父亲/母亲有多个孩子的情况
            test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
            test_completion = ','.join(test_completion_list)
            # test_completion = " {}".format(child)
            iid_ar_negative_positive_positive_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})

            test_prompt = system
            for j in range(5):
                test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
                example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
                test_prompt += example

            test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
            test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
            test_completion = ','.join(test_completion_list)
            # test_completion = " {}".format(child)
            iid_ar_negative_positive_negative_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})

            test_prompt = system
            for j in range(5):
                test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
                example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
                test_prompt += example

            test_prompt += "Q: Whose child is {}?\nA:".format(child)
            # 针对一个孩子有多个父母
            test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
            test_completion = ','.join(test_completion_list)
            # test_completion = " {}".format(parent)
            iid_ar_negative_negative_positive_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})

            test_prompt = system
            for j in range(5):
                test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
                example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
                test_prompt += example

            test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
            test_completion = " {}".format(parent)
            iid_ar_negative_negative_negative_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})

        elif i < len(pair_negative_index) and i == len(pair_positive_index) - 1:
            k = i
            while k < len(pair_negative_index):
                cur_neg_sample = ood_train_low_entity_clarity[pair_negative_index[k]]
                child, parent, parent_type, id = cur_neg_sample['child'], cur_neg_sample['parent'], cur_neg_sample['parent_type'], cur_neg_sample['id']

                another_parent_list = child_to_parent_dict[child].copy()
                another_parent_list.remove(parent)
                another_parent = another_parent_list if len(another_parent_list) != 0 else None
                another_child_list = parent_to_child_dict[parent].copy()
                another_child_list.remove(child)
                another_child = another_child_list if len(another_child_list) != 0 else None

                copy_neg_sample = copy.deepcopy(reference_qa_negative_child_dataset[id])
                copy_neg_sample['field'] = 'iid'
                ood_ar_train_low_entity_clarity_dataset.append(copy_neg_sample)
            
                # 同样是要加入对应的测试样本
                copy_negative_index = copy.deepcopy(pair_negative_index)
                copy_negative_index.pop(k)
                random.seed(i)
                demonstration_index = random.sample(copy_negative_index, 5)

                # 开始制作对应的test样本集合
                test_prompt = system
                for j in range(5):
                    test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
                    example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
                    test_prompt += example

                test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
                # 针对一个父亲/母亲有多个孩子的情况
                test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
                test_completion = ','.join(test_completion_list)
                # test_completion = " {}".format(child)
                iid_ar_negative_positive_positive_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})

                test_prompt = system
                for j in range(5):
                    test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
                    example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
                    test_prompt += example

                test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
                test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
                test_completion = ','.join(test_completion_list)
                # test_completion = " {}".format(child)
                iid_ar_negative_positive_negative_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})

                test_prompt = system
                for j in range(5):
                    test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
                    example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
                    test_prompt += example

                test_prompt += "Q: Whose child is {}?\nA:".format(child)
                # 针对一个孩子有多个父母
                test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
                test_completion = ','.join(test_completion_list)
                # test_completion = " {}".format(parent)
                iid_ar_negative_negative_positive_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})

                test_prompt = system
                for j in range(5):
                    test_demonstration = ood_train_low_entity_clarity[demonstration_index[j]]
                    example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
                    test_prompt += example

                test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
                test_completion = " {}".format(parent)
                iid_ar_negative_negative_negative_test_low_dataset.append({'prompt':test_prompt, 'completion':test_completion})
                k = k + 1

    # 加入high entity clarity和low entity clarity对应的测试样本，同时做它的正样和负样本训练数据，以及对应的正样本和负样本测试数据，并且测试数据只用test_middle里面的对应样本
    origin_index = list(range(len(ood_test_middle_entity_clarity)))
    random.seed(13)
    random.shuffle(origin_index)
    # 还是像正常一样拆分正负样本
    pair_positive_index, pair_negative_index = origin_index[:len(origin_index) // 2 + 1], origin_index[len(origin_index) // 2 + 1:]
    
    if len(origin_index) % 2 == 0:
        assert len(pair_positive_index) == len(pair_negative_index)
    else:
        assert len(pair_positive_index) == len(pair_negative_index) + 1

    # high entity clarity和low entity clarity的训练样本，同时做它的正样和负样本训练数据，以及对应的正样本和负样本测试数据
    for i in range(len(pair_positive_index)):
        
        # 首先加入high entity clarity中各自的训练样本
        cur_pos_sample = ood_test_middle_entity_clarity[pair_positive_index[i]]
        child, parent, parent_type, id = cur_pos_sample['child'], cur_pos_sample['parent'], cur_pos_sample['parent_type'], cur_pos_sample['id']

        another_parent_list = child_to_parent_dict[child].copy()
        another_parent_list.remove(parent)
        another_parent = another_parent_list if len(another_parent_list) != 0 else None
        another_child_list = parent_to_child_dict[parent].copy()
        another_child_list.remove(child)
        another_child = another_child_list if len(another_child_list) != 0 else None

        # 对应的是正向样本，analogy reasoning和全部都是positive的正向训练应该样本是一模一样的，这里和上面是不同的，上面分成了positive和negtive两个部分来存储，但是这里全部统一了
        copy_pos_sample = {'origin_pos_prompt': copy.deepcopy(reference_qa_positive_child_dataset[id]['origin_prompt'])}
        copy_pos_sample['origin_neg_prompt'] =  copy.deepcopy(reference_qa_negative_child_dataset[id]['origin_prompt'])
        copy_pos_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_positive_child_dataset[id]['origin_prompt'])
        # 标记这个训练样本是ood
        copy_pos_sample['field'] = 'ood'

        # 这里对high和low是要同时加的，都是对应的测试的正向样本
        ood_ar_train_high_entity_clarity_dataset.append(copy_pos_sample)
        ood_ar_train_low_entity_clarity_dataset.append(copy_pos_sample)

        copy_positive_index = copy.deepcopy(pair_positive_index)
        copy_positive_index.pop(i)
        # 33
        random.seed(i + 1208)
        demonstration_index = random.sample(copy_positive_index, 5)

        # 开始制作对应的test样本集合
        # 对于analogy reasoning来说，它的正向样本和全量的正向样本是一致的，但是要加入对应的负向样本
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            # 正序 + 正关
            example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
            test_prompt += example

        test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
        test_completion = " {}".format(parent)

        ood_positive_positive_positive_test_dataset.append({'prompt': test_prompt, 'completion': test_completion})
        ood_ar_positive_positive_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})

        """这里做一下变化，用对齐进行clarity分类的那一部分测试集来进行评测
        ood_positive_positive_positive_test_dataset.append({'prompt': reference_standard_positive_positive_positive_test_dataset[id]['prompt'],
                                                               'completion': reference_standard_positive_positive_positive_test_dataset[id]['completion']})
        ood_ar_positive_positive_positive_test_dataset.append({'prompt': reference_standard_positive_positive_positive_test_dataset[id]['prompt'],
                                                               'completion': reference_standard_positive_positive_positive_test_dataset[id]['completion']})
        """

        # 加入对应的负向样本的测试
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
            test_prompt += example

        test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
        # 针对一个父亲/母亲有多个孩子的情况
        test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_completion = ','.join(test_completion_list)
        # test_completion = " {}".format(child)
        ood_negative_positive_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        copy_positive_index = copy.deepcopy(pair_positive_index)
        copy_positive_index.pop(i)
        # 33
        random.seed(i + 1208)
        demonstration_index = random.sample(copy_positive_index, 5)

        # 对于analogy reasoning来说，它的正向样本和全量的正向样本是一致的，但是要加入对应的负向样本
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            # 正序 + 逆关
            example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
            test_prompt += example

        test_prompt += "Q: Whose child is {}?\nA:".format(child)
        test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
        test_completion = ','.join(test_completion_list)
       
        ood_positive_positive_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
        ood_ar_positive_positive_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})

        """这里做一下变化，用对齐进行clarity分类的那一部分测试集来进行评测

        ood_positive_positive_negative_test_dataset.append({'prompt': reference_standard_positive_positive_negative_test_dataset[id]['prompt'],
                                                               'completion': reference_standard_positive_positive_negative_test_dataset[id]['completion']}) 
        ood_ar_positive_positive_negative_test_dataset.append({'prompt': reference_standard_positive_positive_negative_test_dataset[id]['prompt'],
                                                               'completion': reference_standard_positive_positive_negative_test_dataset[id]['completion']})
        """

        # 加入对应的负向样本的测试
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
            test_prompt += example

        test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
        test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_completion = ','.join(test_completion_list)
        # test_completion = " {}".format(child)
        ood_negative_positive_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        copy_positive_index = copy.deepcopy(pair_positive_index)
        copy_positive_index.pop(i)
        # 1020
        random.seed(i + 1049)
        demonstration_index = random.sample(copy_positive_index, 5)

        # 对于analogy reasoning来说，它的正向样本和全量的正向样本是一致的，但是要加入对应的负向样本
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            # 逆序 + 正关
            example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
            test_prompt += example

        test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
        test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_completion = ','.join(test_completion_list)

        ood_positive_negative_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
        ood_ar_positive_negative_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
        
        """这里做一下变化,用对齐进行clarity分类的那一部分测试集来进行评测
        ood_positive_negative_positive_test_dataset.append({'prompt': reference_standard_positive_negative_positive_test_dataset[id]['prompt'],
                                                               'completion': reference_standard_positive_negative_positive_test_dataset[id]['completion']})
        ood_ar_positive_negative_positive_test_dataset.append({'prompt': reference_standard_positive_negative_positive_test_dataset[id]['prompt'],
                                                               'completion': reference_standard_positive_negative_positive_test_dataset[id]['completion']})
        """
                                                               
        # 加入对应的负向样本的测试
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
            test_prompt += example

        test_prompt += "Q: Whose child is {}?\nA:".format(child)
        # 针对一个孩子有多个父母
        test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
        test_completion = ','.join(test_completion_list)
        # test_completion = " {}".format(parent)
        ood_negative_negative_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        copy_positive_index = copy.deepcopy(pair_positive_index)
        copy_positive_index.pop(i)
        # 1033
        random.seed(i + 1033)
        demonstration_index = random.sample(copy_positive_index, 5)

         # 对于analogy reasoning来说，它的正向样本和全量的正向样本是一致的，但是要加入对应的负向样本
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            # 逆序 + 逆关
            example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
            test_prompt += example

        test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
        test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_completion = ','.join(test_completion_list)
        
        ood_positive_negative_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
        ood_ar_positive_negative_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
        
        """这里做一下对齐
        ood_positive_negative_negative_test_dataset.append({'prompt': reference_standard_positive_negative_negative_test_dataset[id]['prompt'],
                                                               'completion': reference_standard_positive_negative_negative_test_dataset[id]['completion']})
        ood_ar_positive_negative_negative_test_dataset.append({'prompt': reference_standard_positive_negative_negative_test_dataset[id]['prompt'],
                                                               'completion': reference_standard_positive_negative_negative_test_dataset[id]['completion']})
        """

        # 加入对应的负向样本的测试
        test_prompt = system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
            test_prompt += example

        test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
        test_completion = " {}".format(parent)
        ood_negative_negative_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})


        # 这里开始同步加negative部分，这里的处理就开始和原本的样本不一致了
        if i < len(pair_negative_index):
            cur_neg_sample = ood_test_middle_entity_clarity[pair_negative_index[i]]
            child, parent, parent_type, id = cur_neg_sample['child'], cur_neg_sample['parent'], cur_neg_sample['parent_type'], cur_neg_sample['id']

            another_parent_list = child_to_parent_dict[child].copy()
            another_parent_list.remove(parent)
            another_parent = another_parent_list if len(another_parent_list) != 0 else None
            another_child_list = parent_to_child_dict[parent].copy()
            another_child_list.remove(child)
            another_child = another_child_list if len(another_child_list) != 0 else None

            copy_neg_sample = {'ar_origin_prompt': copy.deepcopy(reference_qa_negative_child_dataset[id]['origin_prompt'])}
            copy_neg_sample['origin_neg_prompt'] = copy.deepcopy(reference_qa_negative_child_dataset[id]['origin_prompt'])
            copy_neg_sample['origin_pos_prompt'] = copy.deepcopy(reference_qa_positive_child_dataset[id]['origin_prompt'])

            # 标记这个训练样本是ood
            copy_neg_sample['field'] = 'ood'
            ood_ar_train_high_entity_clarity_dataset.append(copy_neg_sample)
            ood_ar_train_low_entity_clarity_dataset.append(copy_neg_sample)
        
            # 同样是要加入对应的测试样本，但是要随着自己的不同发生相应的变化
            copy_negative_index = copy.deepcopy(pair_negative_index)
            copy_negative_index.pop(i)
            random.seed(i)
            demonstration_index = random.sample(copy_negative_index, 5)

            # 开始制作对应的test样本集合

            # 首先是制作analogy reasoning的
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
                example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
                test_prompt += example

            test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
            # 针对一个父亲/母亲有多个孩子的情况
            test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
            test_completion = ','.join(test_completion_list)
            # test_completion = " {}".format(child)
            ood_negative_positive_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
            ood_ar_negative_positive_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})

            # 然后是制作原本样本的
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
                example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
                test_prompt += example
            
            test_prompt += "Q: Who is {}'s {}?\nA:".format(child, parent_type)
            test_completion = " {}".format(parent)

            ood_positive_positive_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
            
            """这里做一下对齐和原样本保持一致
            ood_positive_positive_positive_test_dataset.append({'prompt': reference_standard_positive_positive_positive_test_dataset[id]['prompt'],
                                                                'completion': reference_standard_positive_positive_positive_test_dataset[id]['completion']})
            """

            # 首先是制作analogy reasoning的
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
                example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
                test_prompt += example

            test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
            test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
            test_completion = ','.join(test_completion_list)
            # test_completion = " {}".format(child)
            ood_negative_positive_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
            ood_ar_negative_positive_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})


            # 然后是制作原本样本的
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
                # 正序 + 逆关
                example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
                test_prompt += example

            test_prompt += "Q: Whose child is {}?\nA:".format(child)
            test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
            test_completion = ','.join(test_completion_list)
            
            ood_positive_positive_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
            
            """这里做一下对齐和原样本保持一致
            ood_positive_positive_negative_test_dataset.append({'prompt': reference_standard_positive_positive_negative_test_dataset[id]['prompt'],
                                                                'completion': reference_standard_positive_positive_negative_test_dataset[id]['completion']})
            """
            
            # 首先是制作analogy reasoning的
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
                example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
                test_prompt += example

            test_prompt += "Q: Whose child is {}?\nA:".format(child)
            # 针对一个孩子有多个父母
            test_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
            test_completion = ','.join(test_completion_list)
            # test_completion = " {}".format(parent)
            ood_negative_negative_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
            ood_ar_negative_negative_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})


            # 然后是制作原本样本的
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
                # 逆序 + 正关
                example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
                test_prompt += example

            test_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
            test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
            test_completion = ','.join(test_completion_list)
            
            ood_positive_negative_positive_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
            
            """这里做一下对齐和原样本保持一致
            ood_positive_negative_positive_test_dataset.append({'prompt': reference_standard_positive_negative_positive_test_dataset[id]['prompt'],
                                                                'completion': reference_standard_positive_negative_positive_test_dataset[id]['completion']})
            """

            # 首先是analogy reasoning的
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
                example = "Q: Who is {}'s {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent_type'], test_demonstration['parent'])
                test_prompt += example

            test_prompt += "Q: Who is {}'s {} ?\nA:".format(child, parent_type)
            test_completion = " {}".format(parent)
            ood_negative_negative_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
            ood_ar_negative_negative_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})

            # 然后是制作原始样本的
            test_prompt = system
            for j in range(5):
                test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
                # 逆序 + 逆关
                example = "Q: Who is {}'s child?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
                test_prompt += example

            test_prompt += "Q: Who is {}'s child?\nA:".format(parent)
            test_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
            test_completion = ','.join(test_completion_list)

            ood_positive_negative_negative_test_dataset.append({'prompt':test_prompt, 'completion':test_completion})
            """这里做一下对齐和原样本保持一致
            ood_positive_negative_negative_test_dataset.append({'prompt': reference_standard_positive_negative_negative_test_dataset[id]['prompt'],
                                                                'completion': reference_standard_positive_negative_negative_test_dataset[id]['completion']})
            """
                                                                
    """
    # 测试数据不以ood的方式加入
    # 加入high entity clarity和low entity clarity对应的测试样本，同时做它的正样和负样本训练数据，以及对应的正样本和负样本测试数据，并且测试数据只用test_middle里面的对应样本
    origin_index = list(range(len(ood_test_middle_entity_clarity)))
    # high entity clarity和low entity clarity的训练样本，同时做它的正样和负样本训练数据，以及对应的正样本和负样本测试数据
    for i in range(len(origin_index)):
        
        # 首先加入high entity clarity中各自的训练样本
        cur_pos_sample = ood_test_middle_entity_clarity[i]
        child, parent, parent_type, id = cur_pos_sample['child'], cur_pos_sample['parent'], cur_pos_sample['parent_type'], cur_pos_sample['id']

        another_parent_list = child_to_parent_dict[child].copy()
        another_parent_list.remove(parent)
        another_parent = another_parent_list if len(another_parent_list) != 0 else None
        another_child_list = parent_to_child_dict[parent].copy()
        another_child_list.remove(child)
        another_child = another_child_list if len(another_child_list) != 0 else None

        # 对应的正向训练样本，对应的反向训练样本
        sample = {'positive_origin_prompt': copy.deepcopy(reference_qa_positive_child_dataset[id]['origin_prompt'])}
        sample['negative_origin_prompt'] = copy.deepcopy(reference_qa_negative_child_dataset[id]['origin_prompt'])
        # 标记这个训练样本是ood
        sample['field'] = 'ood'

        # 这里对high和low是要同时加的
        ood_ar_train_high_entity_clarity_dataset.append(sample)
        ood_ar_train_low_entity_clarity_dataset.append(sample)

        # 在这里决断当前的测试样本应该用哪一个
        copy_origin_index = copy.deepcopy(origin_index)
        copy_origin_index.pop(i)
        random.seed(i)
        demonstration_index = random.sample(copy_origin_index, 5)

        # 开始制作对应的test样本集合(positive和negative的要同步制作)
        test_pos_prompt, test_neg_prompt = system, system
        for j in range(5):
            # 取出当前的哪一个样本作为测试样本
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            # 正序 + 正关
            pos_example = "Q: Who is the {} of {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['child'], test_demonstration['parent'])
            neg_example = "Q: Who is the child of {}?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
            test_pos_prompt += pos_example
            test_neg_prompt += neg_example

        test_pos_prompt += "Q: Who is the {} of {}?\nA:".format(parent_type, child)
        test_pos_completion = " {}".format(parent)
        ood_ar_positive_positive_positive_test_dataset.append({'prompt':test_pos_prompt, 'completion':test_pos_completion})
        
        test_neg_prompt += "Q: Who is the child of {}?\nA:".format(parent)
        test_neg_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_neg_completion = ','.join(test_neg_completion_list)
        ood_ar_negative_positive_positive_test_dataset.append({'prompt':test_neg_prompt, 'completion':test_neg_completion})

        test_pos_prompt, test_neg_prompt = system, system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            # 正序 + 逆关
            pos_example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
            neg_example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
            test_pos_prompt += pos_example
            test_neg_prompt += neg_example

        test_pos_prompt += "Q: Whose child is {}?\nA:".format(child)
        test_pos_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
        test_pos_completion = ','.join(test_pos_completion_list)
        ood_ar_positive_positive_negative_test_dataset.append({'prompt':test_pos_prompt, 'completion':test_pos_completion})

        test_neg_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
        test_neg_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_neg_completion = ','.join(test_neg_completion_list)
        ood_ar_negative_positive_negative_test_dataset.append({'prompt':test_neg_prompt, 'completion':test_neg_completion})


        test_pos_prompt, test_neg_prompt = system, system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            # 逆序 + 正关
            pos_example = "Q: Whose {} is {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['parent'], test_demonstration['child'])
            neg_example = "Q: Whose child is {}?\nA: {}\n".format(test_demonstration['child'], test_demonstration['parent'])
            test_pos_prompt += pos_example
            test_neg_prompt += neg_example

        test_pos_prompt += "Q: Whose {} is {}?\nA:".format(parent_type, parent)
        test_pos_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_pos_completion = ','.join(test_pos_completion_list)
        ood_ar_positive_negative_positive_test_dataset.append({'prompt':test_pos_prompt, 'completion':test_pos_completion})

        test_neg_prompt += "Q: Whose child is {}?\nA:".format(child)
        # 针对一个孩子有多个父母
        test_neg_completion_list = [" {}".format(parent), ] if another_parent is None else [" {}".format(sample_another_parent) for sample_another_parent in another_parent] + [" {}".format(parent)]
        test_neg_completion = ','.join(test_neg_completion_list)
        # test_completion = " {}".format(parent)
        ood_ar_negative_negative_positive_test_dataset.append({'prompt':test_neg_prompt, 'completion':test_neg_completion})

        test_pos_prompt, test_neg_prompt = system, system
        for j in range(5):
            test_demonstration = ood_test_middle_entity_clarity[demonstration_index[j]]
            # 逆序 + 逆关
            pos_example = "Q: Who is the child of {}?\nA: {}\n".format(test_demonstration['parent'], test_demonstration['child'])
            neg_example = "Q: Who is the {} of {}?\nA: {}\n".format(test_demonstration['parent_type'], test_demonstration['child'], test_demonstration['parent'])
            test_pos_prompt += pos_example
            test_neg_prompt += neg_example

        test_pos_prompt += "Q: Who is the child of {}?\nA:".format(parent)
        test_pos_completion_list = [" {}".format(child), ] if another_child is None else [" {}".format(sample_another_child) for sample_another_child in another_child] + [" {}".format(child)]
        test_pos_completion = ','.join(test_pos_completion_list)
        ood_ar_positive_negative_negative_test_dataset.append({'prompt':test_pos_prompt, 'completion':test_pos_completion})


        test_neg_prompt += "Q: Who is the {} of {}?\nA:".format(parent_type, child)
        test_neg_completion = " {}".format(parent)
        ood_ar_negative_negative_negative_test_dataset.append({'prompt':test_neg_prompt, 'completion':test_neg_completion})

    """
       
    # 存储相应的训练样本      
    with open(target_ood_ar_train_high_dataset_path, 'w') as file1:
        json.dump(ood_ar_train_high_entity_clarity_dataset, file1)
    
    with open(target_ood_ar_train_low_dataset_path, 'w') as file2:
        json.dump(ood_ar_train_low_entity_clarity_dataset, file2)

    # 存储high训练情况下的iid正向测试
    with open(target_iid_ar_positive_positive_positive_test_high_clarity_entity_dataset_path, 'w') as file3:
        json.dump(iid_ar_positive_positive_positive_test_high_dataset, file3)
    
    with open(target_iid_ar_positive_positive_negative_test_high_clarity_entity_dataset_path, 'w') as file4:
        json.dump(iid_ar_positive_positive_negative_test_high_dataset, file4)

    with open(target_iid_ar_positive_negative_positive_test_high_clarity_entity_dataset_path, 'w') as file5:
        json.dump(iid_ar_positive_negative_positive_test_high_dataset, file5)

    with open(target_iid_ar_positive_negative_negative_test_high_clarity_entity_dataset_path, 'w') as file6:
        json.dump(iid_ar_positive_negative_negative_test_high_dataset, file6)


    # 存储high训练情况下的iid反向测试
    with open(target_iid_ar_negative_positive_positive_test_high_clarity_entity_dataset_path, 'w') as file7:
        json.dump(iid_ar_negative_positive_positive_test_high_dataset, file7)
    
    with open(target_iid_ar_negative_positive_negative_test_high_clarity_entity_dataset_path, 'w') as file8:
        json.dump(iid_ar_negative_positive_negative_test_high_dataset, file8)

    with open(target_iid_ar_negative_negative_positive_test_high_clarity_entity_dataset_path, 'w') as file9:
        json.dump(iid_ar_negative_negative_positive_test_high_dataset, file9)

    with open(target_iid_ar_negative_negative_negative_test_high_clarity_entity_dataset_path, 'w') as file10:
        json.dump(iid_ar_negative_negative_negative_test_high_dataset, file10)


    # 存储low训练情况下的iid正向测试
    with open(target_iid_ar_positive_positive_positive_test_low_clarity_entity_dataset_path, 'w') as file11:
        json.dump(iid_ar_positive_positive_positive_test_low_dataset, file11)
    
    with open(target_iid_ar_positive_positive_negative_test_low_clarity_entity_dataset_path, 'w') as file12:
        json.dump(iid_ar_positive_positive_negative_test_low_dataset, file12)

    with open(target_iid_ar_positive_negative_positive_test_low_clarity_entity_dataset_path, 'w') as file13:
        json.dump(iid_ar_positive_negative_positive_test_low_dataset, file13)

    with open(target_iid_ar_positive_negative_negative_test_low_clarity_entity_dataset_path, 'w') as file14:
        json.dump(iid_ar_positive_negative_negative_test_low_dataset, file14)


    # 存储low训练情况下的iid反向测试
    with open(target_iid_ar_negative_positive_positive_test_low_clarity_entity_dataset_path, 'w') as file15:
        json.dump(iid_ar_negative_positive_positive_test_low_dataset, file15)
    
    with open(target_iid_ar_negative_positive_negative_test_low_clarity_entity_dataset_path, 'w') as file16:
        json.dump(iid_ar_negative_positive_negative_test_low_dataset, file16)

    with open(target_iid_ar_negative_negative_positive_test_low_clarity_entity_dataset_path, 'w') as file17:
        json.dump(iid_ar_negative_negative_positive_test_low_dataset, file17)

    with open(target_iid_ar_negative_negative_negative_test_low_clarity_entity_dataset_path, 'w') as file18:
        json.dump(iid_ar_negative_negative_negative_test_low_dataset, file18)

    # 存储ood下analogy_reasoning的正向测试
    with open(target_ood_ar_positive_positive_positive_test_dataset_path, 'w') as file19:
        json.dump(ood_ar_positive_positive_positive_test_dataset, file19)

    with open(target_ood_ar_positive_positive_negative_test_dataset_path, 'w') as file20:
        json.dump(ood_ar_positive_positive_negative_test_dataset, file20)
    
    with open(target_ood_ar_positive_negative_positive_test_dataset_path, 'w') as file21:
        json.dump(ood_ar_positive_negative_positive_test_dataset, file21)

    with open(target_ood_ar_positive_negative_negative_test_dataset_path, 'w') as file22:
        json.dump(ood_ar_positive_negative_negative_test_dataset, file22)
    
    # 存储ood下的analogy_reasoning的反向测试
    with open(target_ood_ar_negative_positive_positive_test_dataset_path, 'w') as file23:
        json.dump(ood_ar_negative_positive_positive_test_dataset, file23)

    with open(target_ood_ar_negative_positive_negative_test_dataset_path, 'w') as file24:
        json.dump(ood_ar_negative_positive_negative_test_dataset, file24)
    
    with open(target_ood_ar_negative_negative_positive_test_dataset_path, 'w') as file25:
        json.dump(ood_ar_negative_negative_positive_test_dataset, file25)

    with open(target_ood_ar_negative_negative_negative_test_dataset_path, 'w') as file26:
        json.dump(ood_ar_negative_negative_negative_test_dataset, file26)
    
    # 存储ood下正常的positive
    with open(target_ood_positive_positive_positive_test_dataset_path, 'w') as file27:
        json.dump(ood_positive_positive_positive_test_dataset, file27)

    with open(target_ood_positive_positive_negative_test_dataset_path, 'w') as file28:
        json.dump(ood_positive_positive_negative_test_dataset, file28)
    
    with open(target_ood_positive_negative_positive_test_dataset_path, 'w') as file29:
        json.dump(ood_positive_negative_positive_test_dataset, file29)

    with open(target_ood_positive_negative_negative_test_dataset_path, 'w') as file30:
        json.dump(ood_positive_negative_negative_test_dataset, file30)


    # 存储ood下正常的negative
    with open(target_ood_negative_positive_positive_test_dataset_path, 'w') as file31:
        json.dump(ood_negative_positive_positive_test_dataset, file31)

    with open(target_ood_negative_positive_negative_test_dataset_path, 'w') as file32:
        json.dump(ood_negative_positive_negative_test_dataset, file32)
    
    with open(target_ood_negative_negative_positive_test_dataset_path, 'w') as file33:
        json.dump(ood_negative_negative_positive_test_dataset, file33)

    with open(target_ood_negative_negative_negative_test_dataset_path, 'w') as file34:
        json.dump(ood_negative_negative_negative_test_dataset, file34)

def construct_spt_rsp_dataset(origin_ood_parent_child_dataset, reference_spt_positive_train_dataset, reference_spt_negative_train_dataset, reference_rsp_positive_train_dataset, reference_rsp_negative_train_dataset, reference_qa_positive_train_dataset, reference_qa_negative_train_dataset, ood_train_ratio): 

    target_ood_spt_positive_train_high_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_spt_positive_train_high_dataset.json'.format(ood_train_ratio)
    target_ood_spt_negative_train_high_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_spt_negative_train_high_dataset.json'.format(ood_train_ratio)
    target_ood_spt_positive_train_low_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_spt_positive_train_low_dataset.json'.format(ood_train_ratio)
    target_ood_spt_negative_train_low_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_spt_negative_train_low_dataset.json'.format(ood_train_ratio)

    target_ood_rsp_positive_train_high_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_rsp_3_positive_train_high_dataset.json'.format(ood_train_ratio)
    target_ood_rsp_negative_train_high_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_rsp_3_negative_train_high_dataset.json'.format(ood_train_ratio)
    target_ood_rsp_positive_train_low_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_rsp_3_positive_train_low_dataset.json'.format(ood_train_ratio)
    target_ood_rsp_negative_train_low_entity_clarity_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/final_data/ood_parent_child_final/ood_{}/ood_rsp_3_negative_train_low_dataset.json'.format(ood_train_ratio)

    ood_spt_positive_train_high_entity_clarity_dataset, ood_spt_negative_train_high_entity_clarity_dataset, ood_spt_positive_train_low_entity_clarity_dataset, ood_spt_negative_train_low_entity_clarity_dataset = [], [], [], []
    ood_rsp_positive_train_high_entity_clarity_dataset, ood_rsp_negative_train_high_entity_clarity_dataset, ood_rsp_positive_train_low_entity_clarity_dataset, ood_rsp_negative_train_low_entity_clarity_dataset = [], [], [], []
    
    ood_train_high_entity_clarity = origin_ood_parent_child_dataset['origin_train_high_entity_clarity']
    ood_train_low_entity_clarity = origin_ood_parent_child_dataset['origin_train_low_entity_clarity']
    ood_test_middle_entity_clarity = origin_ood_parent_child_dataset['origin_test_middle_entity_clarity']
    
    assert len(ood_train_high_entity_clarity) == len(ood_train_low_entity_clarity)

    for i in range(len(ood_train_high_entity_clarity)):
        # 先存high entity的样本
        cur_pos_sample = ood_train_high_entity_clarity[i]
        child, parent, parent_type, id = cur_pos_sample['child'], cur_pos_sample['parent'], cur_pos_sample['parent_type'], cur_pos_sample['id']

        # 两种正向和两种负向同时操作
        copy_spt_pos_sample = copy.deepcopy(reference_spt_positive_train_dataset[id])
        copy_rsp_pos_sample = copy.deepcopy(reference_rsp_positive_train_dataset[id])
        copy_spt_pos_sample['field'] = 'iid'
        copy_rsp_pos_sample['field'] = 'iid'
        ood_spt_positive_train_high_entity_clarity_dataset.append(copy_spt_pos_sample)
        ood_rsp_positive_train_high_entity_clarity_dataset.append(copy_rsp_pos_sample)
        
        # 对负向样本也进行操作
        copy_spt_neg_sample = copy.deepcopy(reference_spt_negative_train_dataset[id])
        copy_rsp_neg_sample = copy.deepcopy(reference_rsp_negative_train_dataset[id])
        copy_spt_neg_sample['field'] = 'iid'
        copy_rsp_neg_sample['field'] = 'iid'
        ood_spt_negative_train_high_entity_clarity_dataset.append(copy_spt_neg_sample)
        ood_rsp_negative_train_high_entity_clarity_dataset.append(copy_rsp_neg_sample)
        
        # 再存low entity的样本，他们本质的打乱顺序是一模一样的
        cur_pos_sample = ood_train_low_entity_clarity[i]
        child, parent, parent_type, id = cur_pos_sample['child'], cur_pos_sample['parent'], cur_pos_sample['parent_type'], cur_pos_sample['id']

        copy_spt_pos_sample = copy.deepcopy(reference_spt_positive_train_dataset[id])
        copy_rsp_pos_sample = copy.deepcopy(reference_rsp_positive_train_dataset[id])
        copy_spt_pos_sample['field'] = 'iid'
        copy_rsp_pos_sample['field'] = 'iid'
        ood_spt_positive_train_low_entity_clarity_dataset.append(copy_spt_pos_sample)
        ood_rsp_positive_train_low_entity_clarity_dataset.append(copy_rsp_pos_sample)

        # 对负样本保持相同的操作
        copy_spt_neg_sample = copy.deepcopy(reference_spt_negative_train_dataset[id])
        copy_rsp_neg_sample = copy.deepcopy(reference_rsp_negative_train_dataset[id])
        copy_spt_neg_sample['field'] = 'iid'
        copy_rsp_neg_sample['field'] = 'iid'
        ood_spt_negative_train_low_entity_clarity_dataset.append(copy_spt_neg_sample)
        ood_rsp_negative_train_low_entity_clarity_dataset.append(copy_rsp_neg_sample)

        origin_index = list(range(len(ood_test_middle_entity_clarity)))
    random.seed(13)
    random.shuffle(origin_index)
    # 还是像正常一样拆分正负样本
    if len(origin_index) % 2 == 0:
        pair_positive_index, pair_negative_index = origin_index[:len(origin_index) // 2], origin_index[len(origin_index) // 2:]
    else:
        pair_positive_index, pair_negative_index = origin_index[:len(origin_index) // 2 + 1], origin_index[len(origin_index) // 2 + 1:]
    
    if len(origin_index) % 2 == 0:
        assert len(pair_positive_index) == len(pair_negative_index)
    else:
        assert len(pair_positive_index) == len(pair_negative_index) + 1


    for i in range(len(pair_positive_index)):
        cur_pos_sample = ood_test_middle_entity_clarity[pair_positive_index[i]]
        cur_pos_sample_id = cur_pos_sample['id']   # 对于pos的样本来说，它的analogy reasoning和正常的positive样本是一致的
        copy_pos_sample = {'origin_pos_prompt': copy.deepcopy(reference_qa_positive_train_dataset[cur_pos_sample_id]['origin_prompt'])}
        copy_pos_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_positive_train_dataset[cur_pos_sample_id]['origin_prompt'])
        # 对负向样本来说要从负向的qa中抽取相应的结果，但是它的pos样本依然是保持不变的
        copy_neg_sample = {'origin_neg_prompt': copy.deepcopy(reference_qa_negative_train_dataset[cur_pos_sample_id]['origin_prompt'])}
        copy_neg_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_positive_train_dataset[cur_pos_sample_id]['origin_prompt'])

        copy_pos_sample['field'] = 'ood'
        copy_neg_sample['field'] = 'ood'

        ood_spt_positive_train_high_entity_clarity_dataset.append(copy_pos_sample)
        ood_spt_positive_train_low_entity_clarity_dataset.append(copy_pos_sample)
        ood_spt_negative_train_high_entity_clarity_dataset.append(copy_neg_sample)
        ood_spt_negative_train_low_entity_clarity_dataset.append(copy_neg_sample)


        ood_rsp_positive_train_high_entity_clarity_dataset.append(copy_pos_sample)
        ood_rsp_positive_train_low_entity_clarity_dataset.append(copy_pos_sample)
        ood_rsp_negative_train_high_entity_clarity_dataset.append(copy_neg_sample)
        ood_rsp_negative_train_low_entity_clarity_dataset.append(copy_neg_sample)

        if i < len(pair_negative_index):
            cur_neg_sample = ood_test_middle_entity_clarity[pair_negative_index[i]]
            cur_neg_sample_id = cur_neg_sample['id']  # 对于负向样本来说，它的analogy reasoning本身也是和原本的负向样本一致的
            copy_neg_sample = {'origin_neg_prompt': copy.deepcopy(reference_qa_negative_train_dataset[cur_neg_sample_id]['origin_prompt'])}
            copy_neg_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_negative_train_dataset[cur_neg_sample_id]['origin_prompt'])
            # 这里要额外增加对应的正向样本
            copy_pos_sample = {'origin_pos_prompt': copy.deepcopy(reference_qa_positive_train_dataset[cur_neg_sample_id]['origin_prompt']) }
            copy_pos_sample['ar_origin_prompt'] = copy.deepcopy(reference_qa_negative_train_dataset[cur_neg_sample_id]['origin_prompt'])

            copy_pos_sample['field'] = 'ood'
            copy_neg_sample['field'] = 'ood'
            
            ood_spt_positive_train_high_entity_clarity_dataset.append(copy_pos_sample)
            ood_spt_positive_train_low_entity_clarity_dataset.append(copy_pos_sample)
            ood_spt_negative_train_high_entity_clarity_dataset.append(copy_neg_sample)
            ood_spt_negative_train_low_entity_clarity_dataset.append(copy_neg_sample)


            ood_rsp_positive_train_high_entity_clarity_dataset.append(copy_pos_sample)
            ood_rsp_positive_train_low_entity_clarity_dataset.append(copy_pos_sample)
            ood_rsp_negative_train_high_entity_clarity_dataset.append(copy_neg_sample)
            ood_rsp_negative_train_low_entity_clarity_dataset.append(copy_neg_sample)
    
    with open(target_ood_spt_positive_train_high_entity_clarity_dataset_path, 'w') as file1:
        json.dump(ood_spt_positive_train_high_entity_clarity_dataset, file1)
    
    with open(target_ood_spt_negative_train_high_entity_clarity_dataset_path, 'w') as file2:
        json.dump(ood_spt_negative_train_high_entity_clarity_dataset, file2)
    
    with open(target_ood_spt_positive_train_low_entity_clarity_dataset_path, 'w') as file3:
        json.dump(ood_spt_positive_train_low_entity_clarity_dataset, file3)
    
    with open(target_ood_spt_negative_train_low_entity_clarity_dataset_path, 'w') as file4:
        json.dump(ood_spt_negative_train_low_entity_clarity_dataset, file4)

    with open(target_ood_rsp_positive_train_high_entity_clarity_dataset_path, 'w') as file5:
        json.dump(ood_rsp_positive_train_high_entity_clarity_dataset, file5)
    
    with open(target_ood_rsp_negative_train_high_entity_clarity_dataset_path, 'w') as file6:
        json.dump(ood_rsp_negative_train_high_entity_clarity_dataset, file6)
    
    with open(target_ood_rsp_positive_train_low_entity_clarity_dataset_path, 'w') as file7:
        json.dump(ood_rsp_positive_train_low_entity_clarity_dataset, file7)
    
    with open(target_ood_rsp_negative_train_low_entity_clarity_dataset_path, 'w') as file8:
        json.dump(ood_rsp_negative_train_low_entity_clarity_dataset, file8)


origin_ood_parent_child_dataset = json.load(open(origin_ood_parent_child_dataset_path))
reference_qa_positive_train_dataset = json.load(open(reference_qa_positive_child_dataset_path))
reference_qa_negative_train_dataset = json.load(open(reference_qa_negative_child_dataset_path))
reference_spt_positive_train_dataset = json.load(open(reference_spt_positive_train_dataset_path))
reference_spt_negative_train_dataset = json.load(open(reference_spt_negative_train_dataset_path))
reference_rsp_positive_train_dataset = json.load(open(reference_rsp_positive_train_dataset_path))
reference_rsp_negative_train_dataset = json.load(open(reference_rsp_negative_train_dataset_path))

construct_qa_dataset(origin_ood_parent_child_dataset, reference_qa_positive_train_dataset, reference_qa_negative_train_dataset, ood_train_ratio)
construct_ar_dataset(origin_ood_parent_child_dataset, reference_qa_positive_train_dataset, reference_qa_negative_train_dataset, ood_train_ratio)
construct_spt_rsp_dataset(origin_ood_parent_child_dataset, reference_spt_positive_train_dataset, reference_spt_negative_train_dataset, reference_rsp_positive_train_dataset, reference_rsp_negative_train_dataset, reference_qa_positive_train_dataset, reference_qa_negative_train_dataset, ood_train_ratio)