import argparse
import os
import ipdb
from tqdm import tqdm
import string

if __name__ == '__main__':
    parser = argparse.ArgumentParser('clean input file')
    parser.add_argument('--fp', type=str, help='allennlp file')
    parser.add_argument('--out', type=str, help='allennlp file')
    args = parser.parse_args()

    args.out = f"{args.fp}.processed"
    fin = open(args.fp, 'r')
    fout = open(args.out,'w')
    for line in fin:
        line = line.strip().split('\t')
        text = line[0].strip()
        try:
            confidence = line[2].strip()
        except:
            raise Exception('Unable to find confidence in line: ',line)
        line = line[1].strip()
        try:
            arg1 = line[line.index('<arg1>') + 6:line.index('</arg1>')].strip()
        except:
            arg1 = ""
        try:
            rel = line[line.index('<rel>') + 5:line.index('</rel>')].strip()
        except:
            rel = ""
        try:
            arg2 = line[line.index('<arg2>') + 6:line.index('</arg2>')].strip()
        except:
            arg2 = ""
        if rel != "":
            # if rel.split()[0] in ["be","is","am","are","was","were","been","being"]:
            #     rel = ['be'] + rel.split()[1:]
            #     rel = " ".join(rel).strip()
            ext = ""
            if arg1:
                ext += ('<arg1> ' + arg1 + ' </arg1> ')
            ext += ('<rel> ' + rel + ' </rel> ')
            if arg2:
                ext += ('<arg2> ' + arg2 + ' </arg2>')
            # fout.write(text + '\t' + ext + '\t' + '1.0' + '\n')
            fout.write(text + '\t' + ext + '\t' + confidence + '\n')