import os
from collections import OrderedDict

split_size = {
    'train': {
        'de': 395946,
        'en': 160516,
        'fr': 76527,
        'ru': 78412,
        'zh': 58682
    }
    ,
    'dev': {
        'de': 21997,
        'en': 8918,
        "fr": 4251,
        "ru": 4357,
        "zh": 3261
    }
}
langs=['de', 'en', 'fr', 'ru', 'zh']

prefix = "/home/tiger/Datasets/multilingual/clean0316"

def move_split_data(split: str):
    split_info = split_size[split]
    indir = prefix + "/lang5"                
    for file in os.listdir(indir):
        if file.startswith(split) and not \
            file.endswith('mapping.json'):
            with open(indir+'/'+file, 'r') as fin:
                for lg in langs:
                    print("writing {} for {}".format(file, lg))
                    outdir = prefix + "/{}".format(lg)
                    if not os.path.exists(outdir):
                        os.makedirs(outdir)
                    with open(outdir+'/'+file, 'w') as fout:
                        for _ in range(split_info[lg]):
                            line = fin.readline()
                            fout.write(line)

move_split_data('train')
move_split_data('dev')