#!/usr/bin/env python
import json
import urllib.request
from urllib.parse import unquote, quote

import requests
import stanza
from numpy import unicode


class NodeIdentifier:
    def __init__(self):
        self.stop_word = []
        with open('/DATA_PATH/ImRL/data/dictionary/stop_words.txt') as f:
            lines = f.readlines()
            for line in lines:
                self.stop_word.append(line.replace('\n', ''))
            f.close()
        self.analyse = stanza.Pipeline('en', dir='/DATA_PATH/stanza_resources', use_gpu=True)

    def linking_EARL(self, question, topk=1):
        """
        Link entity to DBpedia
        :param question:
        :return:
        """
        import ssl
        ssl._create_default_https_context = ssl._create_unverified_context
        req = urllib.request.Request('https://earldemo.sda.tech/earl/api/processQuery')
        req.add_header('Content-Type', 'application/json')
        inputjson = {'nlquery': question}
        response = urllib.request.urlopen(req, json.dumps(inputjson).encode('utf-8'))
        response = json.loads(response.read())
        entities = []
        classes = []
        for _i, item in enumerate(response['ertypes']):
            if item == 'relation':
                head_char = response['rerankedlists'][unicode(_i)][0][1].split('/')[-1]
                if head_char >= 'A' and head_char <= 'Z':
                    classes.append({'chunk': response['chunktext'][_i]['chunk'],
                                    'surfacelength': response['chunktext'][_i]['surfacelength'],
                                    'surfacestart': response['chunktext'][_i]['surfacestart'],
                                    'uri': response['rerankedlists'][unicode(_i)][0][1]})
            else:
                if response['rerankedlists'][unicode(_i)][0][1] == 'null':
                    continue
                entities.append({'chunk': response['chunktext'][_i]['chunk'],
                                 'surfacelength': response['chunktext'][_i]['surfacelength'],
                                 'surfacestart': response['chunktext'][_i]['surfacestart'],
                                 'uri': response['rerankedlists'][unicode(_i)][0][1]})
        # return classes, entities
        return classes

    def linking_Falcon_short(self, entity_mention, topk=1):
        headers = {'Content-Type': 'application/json'}
        data = '{"text": \"' + entity_mention + '\"}'
        response = requests.post('https://labs.tib.eu/falcon/api?mode=short&k=' + str(topk), headers=headers, data=data)
        res = json.loads(response.content)
        entity = []
        for e in res['entities']:
            entity.append(e[0])
        return entity[0] if len(entity) > 0 else None

    def linking_Falcon_long(self, question):
        headers = {'Content-Type': 'application/json'}
        data = '{"text": \"' + question + '\"}'
        response = requests.post('https://labs.tib.eu/falcon/api?mode=long', headers=headers, data=data)
        res = json.loads(response.content)
        entity = []
        for e in res['entities']:
            entity.append(e)
        return entity

    def nodeIdentify(self, question):
        """
        identify the possible node
        :param sentence:
        :return:
        """
        result = []
        sentence = self.analyse(question).sentences[0]
        classes = self.linking_EARL(question)
        falcon_entities = self.linking_Falcon_long(question.replace("\"", "%22"))
        for entity in falcon_entities:
            entity[1] = entity[1].replace("%22", "\"")
            start_pos = question.find(entity[1])
            if start_pos != -1:
                result.append({'chunk': entity[1], 'type': 'entity', 'uri': entity[0],
                               'start_pos': start_pos, 'end_pos': start_pos + len(entity[1])})
        for _class in classes:
            flag = True
            for word in sentence.words:
                if word.text == _class['chunk'] and word.upos == 'VERB':
                    flag = False
                    break
            if flag:
                result.append(
                    {'chunk': _class['chunk'], 'type': 'class', 'uri': _class['uri'],
                     'start_pos': _class['surfacestart'],
                     'end_pos': _class['surfacestart'] + _class['surfacelength']})
        for word in sentence.words:
            if word.id == "1" and (
                    word.lemma == "which" or word.lemma == "when" or word.lemma == "who" or word.lemma == "where" or word.lemma == "whose" or (
                    word.lemma == "what" and sentence.words[1].upos != 'NOUN') or (
                            word.text == "Name" or word.text == "List")):
                _uri = ""
                if word.lemma == 'which':
                    noun = sentence.words[1].text
                    flag = True
                    for _c in result:
                        if noun in _c['chunk'] or _c['chunk'] in noun:
                            flag = False
                            break
                    if not flag:
                        continue
                elif word.lemma == 'when':
                    _uri = 'http://dbpedia.org/ontology/Date'
                elif word.lemma == 'where':
                    _uri = 'http://dbpedia.org/ontology/Place'
                elif word.lemma == 'who' or word.lemma == 'whose':
                    _uri = 'http://dbpedia.org/ontology/Person'
                else:
                    if word.text == 'Name' or word.text == 'List':
                        if sentence.words[1].upos == 'DET':
                            noun = sentence.words[2].text
                        else:
                            noun = sentence.words[1].text
                        flag = True
                        for _c in result:
                            if noun in _c['chunk'] or _c['chunk'] in noun:
                                flag = False
                                break
                        if not flag:
                            continue
                    _uri = 'http://www.w3.org/2002/07/owl#Thing'
                result.append({'chunk': word.text, 'type': 'class', 'uri': _uri,
                               'start_pos': 0,
                               'end_pos': len(word.text)})
            if word.id == "2" and sentence.words[0].upos == "ADP" and (
                    word.lemma == "which" or word.lemma == "when" or word.lemma == "who" or word.lemma == "where" or word.lemma == "whose" or (
                    word.lemma == "what" and sentence.words[2].upos != 'NOUN')):
                _uri = ""
                if word.lemma == 'which':
                    noun = sentence.words[2].text
                    flag = True
                    for _c in result:
                        if noun in _c['chunk'] or _c['chunk'] in noun:
                            flag = False
                            break
                    if not flag:
                        continue
                elif word.lemma == 'when':
                    _uri = 'http://dbpedia.org/ontology/Date'
                elif word.lemma == 'where':
                    _uri = 'http://dbpedia.org/ontology/Place'
                elif word.lemma == 'who' or word.lemma == 'whose':
                    _uri = 'http://dbpedia.org/ontology/Person'
                else:
                    if word.text == 'Name' or word.text == 'List':
                        if sentence.words[2].upos == 'DET':
                            noun = sentence.words[3].text
                        else:
                            noun = sentence.words[2].text
                        flag = True
                        for _c in result:
                            if noun in _c['chunk'] or _c['chunk'] in noun:
                                flag = False
                                break
                        if not flag:
                            continue
                    _uri = 'http://www.w3.org/2002/07/owl#Thing'
                result.append({'chunk': word.text, 'type': 'class', 'uri': _uri,
                               'start_pos': len(sentence.words[0]) + 1,
                               'end_pos': len(sentence.words[0]) + 1 + len(word.text)})
        return result


if __name__ == '__main__':
    nodeIdentifier = NodeIdentifier()
    print(nodeIdentifier.nodeIdentify(
        "Name the scientist whose supervisor was Ernest Rutherford and had a doctoral students named Charles Drummond Ellis?"))
