import argparse

from pyparsing import nestedExpr, Combine, OneOrMore, CharsNotIn, ParserElement

content = Combine(
    OneOrMore(
        CharsNotIn(
            "[]" + ParserElement.DEFAULT_WHITE_CHARS,
            exact=1,
        )
    )
).setParseAction(lambda t: t[0].strip())


def parse(line):
    def _parse(_tree, _intent=None):
        _string = ""
        if all(isinstance(n, str) for n in _tree):
            if "IN:" in _tree[0]:
                if _intent is None:
                    _intent = _tree[0][3:]
                _string = " ".join((n + "|other" for n in _tree[1:]))
            elif "SL:" in _tree[0]:
                slot = _tree[0][3:]
                _string = " ".join((n + "|" + slot.lower() for n in _tree[1:]))
            else:
                _string = " ".join((n + "|other" for n in _tree))
            _string += " "
        else:
            for n in _tree:
                if isinstance(n, str):
                    if "IN:" in n:
                        if _intent is None:
                            _intent = n[3:]
                    elif "SL:" in n:
                        pass
                    else:
                        _string += n + "|other "
                else:
                    i, s = _parse(n)
                    if _intent is None:
                        _intent = i
                    _string += s
        return _intent, _string

    nestedList = nestedExpr(opener="[", closer="]", ignoreExpr=None).parseString(line).asList()
    intent, parsed_line = _parse(nestedList)
    if intent is None:
        intent = "null"
    return "\t".join(("facebook", intent.lower(), parsed_line[:-1]))


def main(input_path, output_path):
    with open(input_path, "rt") as fpr, open(output_path, "wt") as fpw:
        for line in fpr:
            if not line.strip():
                continue
            formatted_line = parse(line.split("\t")[2])
            fpw.write(formatted_line + "\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Convert the TOP (Facebook Dialog Corpus) dataset into TSV format, '
                    'only the leaf slots are kept and the first intent in preorder.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input', type=str, help='Input in the format of the Facebook Dialog Corpus')
    parser.add_argument('output', type=str, help='Output in the TSV format')
    args = parser.parse_args()
    main(args.input, args.output)
