import json, os
from utils.config import configs
import random

SEED = 0  # fixed
SPLIT_RATIO = (0.8, 0.9, 1.0)

if __name__ == '__main__':
    path = configs.raw_data_path
    split_output_dir = (path.replace('.json', ''))
    if not os.path.isdir(split_output_dir):
        os.mkdir(split_output_dir)

    f = open(path)
    lines = f.readlines()

    random.Random(SEED).shuffle(lines)

    N = len(lines)
    splits = lines[: int(N * SPLIT_RATIO[0])], \
        lines[int(N * SPLIT_RATIO[0]): int(N * SPLIT_RATIO[1])], \
        lines[int(N * SPLIT_RATIO[1]):]
    for file_name, split in zip(['train', 'dev', 'test'], splits):
        wf = open(os.path.join(split_output_dir, '%s.jsonl' % file_name), 'w')
        wf.writelines(split)
        wf.close()
