import os
import json
import argparse

def getlang(url):
    lang = url.split('/')[3]
    assert len(lang) == 2
    return lang

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', type=str, default="data.txt", help='original file')
    parser.add_argument('-d', type=str, default="output", help='output dir')
    parser.add_argument('-l', type=str, default="fr", help='default language')
    args = parser.parse_args()

    language = {args.l: []}

    with open(args.i) as f:
        for line in f:
            e = json.loads(line)
            if "url" in e.keys():
                l = getlang(e['url'])
                if l in language.keys():
                    language[l].append(e)
                else:
                    language[l] = [e]
            else:
                language[args.l].append(e)

    for lang, examples in language.items():
        with open(os.path.join(args.d, lang + ".txt"), 'w') as fout:
            for ex in examples:
                fout.write(json.dumps(ex, ensure_ascii=False) + "\n")
                fout.flush()