import json
import os


def get_dd_corpus(setname):
    assert setname in ["train", "valid", "test"]
    daily_fname = "./data/dailydialog/{}/{}.txt".format(setname, setname)
    assert os.path.exists(daily_fname)

    with open(daily_fname, "r", encoding="UTF8") as f_d:
        ls = [el.strip() for el in f_d.readlines()]
        for idx, line in enumerate(ls):
            line = [
                el.strip().lower()
                for el in line.split("__eou__")
                if el.strip() != ""
            ]
            ls[idx] = line
    return ls


def get_syndd_corpus(d_type, setname, approach):
    if approach == "semi_hard":
        approach = "none"

    assert setname in ["train", "valid", "test"]

    if d_type == "dailydialog++":
        daily_fname = "./data/{}/{}/{}.json".format(d_type, setname, setname)
    else:
        daily_fname = "./data/{}/{}/{}_{}.json".format(d_type, setname, setname, approach)
    
    print(daily_fname)
    assert os.path.exists(daily_fname)

    conv_data = []
    conv_idx = 0

    for line in open(daily_fname, "r"):
        if conv_idx > 9258:
            break
        conv_data.append(json.loads(line))
        conv_idx += 1

    return conv_data
