from milie.carb.oieReader import OieReader
from milie.carb.extraction import Extraction
from _collections import defaultdict
import ipdb

class GoldReader(OieReader):
    
    # Path relative to repo root folder
    default_filename = './oie_corpus/all.oie' 
    
    def __init__(self):
        self.name = 'Gold'
    
    def read(self, fn):
        d = defaultdict(lambda: [])
        multilingual = False
        for lang in ['spanish']:
            if lang in fn:
                multilingual = True
                encoding = lang
                break        
        if multilingual and encoding == 'spanish':
            fin = open(fn, 'r', encoding='latin-1')
        else:
            fin = open(fn)
        #with open(fn) as fin:
        for line_ind, line in enumerate(fin):
            data = line.strip().split('\t')
            text, rel = data[:2]
            args = data[2:]
            confidence = 1
                
            curExtraction = Extraction(pred = rel.strip(),
                                       head_pred_index = None,
                                       sent = text.strip(),
                                       confidence = float(confidence),
                                       index = line_ind)
            for count,arg in enumerate(args):
                if "C: " in arg:
                    continue
                curExtraction.addArg(arg.strip())

                
            d[text.strip()].append(curExtraction)
        self.oie = d

    def read_data(self, data):
        d = defaultdict(lambda: [])
        for ind, ex in enumerate(data):
            tuples = ex['tuples']
            for t in tuples:
                if t["relation"].strip() == "<be>":
                    rel = "[is]"
                else:
                    rel = t["relation"].replace("<be> ", "")
                confidence = 1
                sentence = ex['sentence']
                curExtraction = Extraction(pred=rel,
                                           head_pred_index=None,
                                           sent=sentence.strip(),
                                           confidence=float(confidence),
                                           index=ind)
                if t["arg0"] != "":
                    curExtraction.addArg(t["arg0"])
                for arg in t['args']:
                    if arg != "":
                        curExtraction.addArg(arg)
                d[sentence].append(curExtraction)
        self.oie = d

if __name__ == '__main__' :
    g = GoldReader()
    g.read('../oie_corpus/all.oie', includeNominal = False)
    d = g.oie
    e = d.items()[0]
    print(e[1][0].bow())
    print(g.count())
