""" Usage:
    tabReader --in=INPUT_FILE

Read a tab-formatted file.
Each line consists of:
sent, prob, pred, arg1, arg2, ...

"""

from milie.carb.oie_readers.oieReader import OieReader
from milie.carb.oie_readers.extraction import Extraction
from docopt import docopt
import logging
import json

logging.basicConfig(level = logging.DEBUG)


class DataReader(OieReader):
    def __init__(self):
        self.name = 'DataReader'

    def read(self, data):
        d = {}
        ex_index = 0
        for count, sent in enumerate(data):
            triples = data[sent]
            for trp in triples:
                parts = trp
                conf  = float(trp[-1])
                s,rel,o = parts[0], parts[1], parts[2]
                args = parts[3:-1] if len(parts)>4 else []
                curExtraction = Extraction(pred=rel,
                                           head_pred_index=None,
                                           sent=sent,
                                           confidence=float(conf),
                                           question_dist="./question_distributions/dist_wh_sbj_obj1.json",
                                           index=ex_index)
                ex_index += 1
                curExtraction.addArg(s)
                if o!='':
                    curExtraction.addArg(o)
                for arg in args:
                    if arg =='':
                        continue
                    curExtraction.addArg(arg)
                d[sent] = d.get(sent, []) + [curExtraction]
        self.oie = d


class JsonReader(OieReader):
    def __init__(self):
        self.name = 'JsonReader'


    def read(self, fn):
        d = {}
        with open(fn, encoding='utf8') as f:
            data = json.load(f)
        ex_index = 0
        for count, sent in enumerate(data):
            triples = data[sent]
            for trp in triples:
                parts = trp
                conf = 1.0
                #conf  = float(trp[-1])
                s,rel,o = parts[0], parts[1], parts[2]
                args = parts[3:-1] if len(parts)>4 else []
                curExtraction = Extraction(pred=rel,
                                           head_pred_index=None,
                                           sent=sent,
                                           confidence=float(conf),
                                           question_dist="./question_distributions/dist_wh_sbj_obj1.json",
                                           index=ex_index)
                ex_index += 1
                curExtraction.addArg(s)
                if o!='':
                    curExtraction.addArg(o)
                for arg in args:
                    if arg =='':
                        continue
                    curExtraction.addArg(arg)

                d[sent] = d.get(sent, []) + [curExtraction]
        self.oie = d

class TabReader(OieReader):

    def __init__(self):
        self.name = 'TabReader'

    def read(self, fn):
        """
        Read a tabbed format line
        Each line consists of:
        sent, prob, pred, arg1, arg2, ...
        """
        d = {}
        ex_index = 0
        with open(fn) as fin:
            for line in fin:
                if not line.strip():
                    continue
                data = line.strip().split('\t')
                if len(data) <3:
                    continue
                text, confidence, rel = data[:3]
                curExtraction = Extraction(pred = rel,
                                           head_pred_index = None,
                                           sent = text,
                                           confidence = float(confidence),
                                           question_dist = "./question_distributions/dist_wh_sbj_obj1.json",
                                           index = ex_index)
                ex_index += 1

                for arg in data[3:-1]:
                    curExtraction.addArg(arg)

                d[text] = d.get(text, []) + [curExtraction]
        self.oie = d


if __name__ == "__main__":
    args = docopt(__doc__)
    input_fn = args["--in"]
    tr = TabReader()
    tr.read(input_fn)
