def clean_snips(
    input_txt,
    input_labels,
    output_file,
):
    txt_lines = open(input_txt, 'r').readlines()
    labels = open(input_labels, 'r').readlines()

    labels_set = list(set(labels))
    label_to_class_num = {label:i for i, label in enumerate(labels_set)}
    
    with open(output_file, "w") as writer:
        for label, line in zip(labels, txt_lines):
            output_line = f"{label_to_class_num[label]}\t{line.strip()}\n"
            writer.write(output_line)


if __name__ == "__main__":

    for split in ['train', 'test']:
        clean_snips(
            input_txt = f"full-datasets/snips_raw/snips_{split}_seq_in.in",
            input_labels = f"full-datasets/snips_raw/snips_{split}_seq_labels.txt",
            output_file = f"full-datasets/snips/{split}.txt",
        )