import sys
import os

import argparse
import json

from stanford_client import StanfordNLP

def getParser():
    parser = argparse.ArgumentParser(description="parser for arguments", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-t", "--triplefile", type=str, help="input file containing triples", default='triples/reverb20k/sentences.txt')
    parser.add_argument("-o", "--tripleout", type=str, help="output file for storing entities linked to triples", default='triples/reverb20k/sentences_linked.txt')
    parser.add_argument("-e", "--ent2id", type=str, help="files containing entity to id", default='triples/reverb20k/ent2id.txt')
    parser.add_argument("-l", "--entout", type=str, help="output file for storing linked entities", default='triples/reverb20k/ent2id_linked.txt')
    return parser

def getEntities(resp):
    entities = resp['sentences'][0]['entitymentions']
    tokens = resp['sentences'][0]['tokens']
    ret_dict = {}
    if len(entities) > 0:
        for entity in entities:
            mention = " ".join([token['word'] for token in tokens[entity['tokenBegin']:entity['tokenEnd']]])
            linked_entity = entity.get('entitylink', 'NA')
            ner = entity.get('ner', 'NA')
            ret_dict[mention] = {'entity':linked_entity, 'ner':ner}
            return ret_dict
    else:
        return {}

def link_entities(params):
    sentences = []
    entities = {}
    delim1 = "\t"
    sNLP = StanfordNLP()
    with open(params.ent2id) as fin:
        for line in fin:
            line = line.strip()
            if line == "":
                continue
            line = line.split(delim1)
            if len(line) < 2:
                continue
            mention = line[0].strip()
            eid =int(line[1])
            sentences.append(mention)
            cur_ents = []
            for ment in [mention, mention.capitalize(), mention.title(), mention.upper()]:
                resp = sNLP.entity_link(ment)
                resp = getEntities(resp)
                if ment in resp:
                    resp['eid'] = eid
                    cur_ents.append(resp)
            entities[mention] = cur_ents
    with open(params.entout, 'w') as fout:
        json.dump(entities, fout)

def link_entities_from_triples(params):
    sentences = []
    entities = []
    sNLP = StanfordNLP()
    with open(params.triplefile) as fin:
        for line in fin:
            line = line.strip()
            if line == "":
                continue
            triple = line.split('#')
            sentence = line.replace('#', ' ')
            sentences.append(sentence)
            resp = sNLP.entity_link(sentence)
            resp = getEntities(resp)
            if triple[-1] in resp:
                entities.append(resp)
    with open(params.tripleout, 'w') as fout:
        json.dump({'sentences': sentences, 'entities':entities}, fout)

def main():
    parser = getParser()
    try:
        params = parser.parse_args()
    except:
        # parser.print_help()
        sys.exit(1)
    link_entities_from_triples(params)
    link_entities(params)

if __name__ == "__main__":
    main()

