import json
from tqdm import tqdm
from copy import deepcopy


def process(in_file_name):
    words = []
    with open('../ConceptNet/high_frequency_words.txt', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            words.append(line.strip())

    words = set(words)

    def get_two_list(words_list):
        high = []
        low = []

        for word in words_list:
            if word in words:
                high.append(word)
            else:
                low.append(word)
        l = min(len(high), len(low))
        return high[:l], low[:l]

    with open(in_file_name + '.json', encoding='utf-8') as f:
        a = json.load(f)
        high_out = deepcopy(a)
        low_out = deepcopy(a)

        for idx, scene in tqdm(enumerate(a)):
            bedding_kws = scene['bedding_kws']
            ending_kws = scene['ending_kws']

            high_bedding_kws, low_bedding_kws = get_two_list(bedding_kws)
            high_ending_kws, low_ending_kws = get_two_list(ending_kws)
            high_out[idx]['bedding_kws'] = high_bedding_kws
            high_out[idx]['ending_kws'] = high_ending_kws
            low_out[idx]['bedding_kws'] = low_bedding_kws
            low_out[idx]['ending_kws'] = low_ending_kws

    high_out_file_name = in_file_name + '_high.json'
    low_out_file_name = in_file_name + '_low.json'
    with open(high_out_file_name, 'w', encoding='utf-8') as f:
        json.dump(high_out, f, ensure_ascii=False, indent=1)

    with open(low_out_file_name, 'w', encoding='utf-8') as f:
        json.dump(low_out, f, ensure_ascii=False, indent=1)


if __name__ == '__main__':
    process('../data/test_add_node_onecard')
