import pandas as pd
import json
import os

def check_unique_parent_child(json_dataset):
    parent_name = [sample['parent'] for sample in json_dataset]
    child_name = [sample['child'] for sample in json_dataset]
    if len(parent_name) == len(set(parent_name)):
        pass 
    else:
        print('the parent is not unique')   # 说明有多个孩子可能对应一个parent
        exit(0)
    if len(child_name) == len(set(child_name)):   # 说明一个孩子可能是多个parent
        pass
    else:
        print('the child is not unique')
        exit(0)


csv_pc_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/celebrity_relations/parent_child_pairs.csv'
json_pc_dataset_path = '/home/hadoop-aipnlp/nazarite/reverse_curse/reversal_curse-main/data/celebrity_relations/parent_child_pairs.json'

if os.path.isfile(json_pc_dataset_path):
    json_pc_dataset = json.load(open(json_pc_dataset_path, 'r'))
else:
    csv_pc_dataset = pd.read_csv(csv_pc_dataset_path)
    selected_columns_csv_pc_dataset = csv_pc_dataset[['child', 'parent', 'parent_type']]
    json_pc_dataset = selected_columns_csv_pc_dataset.to_json(orient='records')
    json_pc_dataset = json.loads(json_pc_dataset)
    assert len(json_pc_dataset) == 1513
    with open(json_pc_dataset_path, 'w') as file:
        json.dump(json_pc_dataset, file)

check_unique_parent_child(json_pc_dataset)
