from json import JSONDecodeError
import os

from defiNNet.DefAnalyzer import ParsedDefinition
from stanfordNLP import StanfordNLP
from nltk.corpus import wordnet as wn


def parse_definitions_for_oov_words_in(input_path='data/oov_in_synset.txt', output_dir='data/oov_definitions'):
    sNLP = StanfordNLP()
    first = True

    nouns = []
    verbs = []

    verb_labels = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
    noun_labels = ['NN', 'NNS', 'NNP', 'NNPS', '-NOM', 'NP', 'NX']

    with open(input_path, 'r') as input_file:
        for line in input_file.readlines():
            if first:
                first = False
                continue

            [word, synset_name, pos_original, _] = line.split('\t')
            definition = wn.synset(synset_name).definition()
            print(definition)
            word, pos = sNLP.pos(word)[0]
            print(word, pos)
            if pos_original == 'V':
                if pos not in verb_labels:
                    pos = 'VB'
            else:
                if pos_original == 'N':
                    if pos not in noun_labels:
                        pos = 'NN'
            print(word, pos)
            if definition is not None and definition != '':
                try:
                    definition = sNLP.parse(definition)
                except JSONDecodeError:
                    pass
                    continue

            if pos in verb_labels:
                verbs.append('\t'.join([word, synset_name, definition, pos.upper(), '#\n']))
            else:
                nouns.append('\t'.join([word, synset_name, definition, pos.upper(), '#\n']))

    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    with open(os.path.join(output_dir, 'verbs_oov_definition_parsed.txt'), 'w+') as file:
        file.writelines(verbs)
    with open(os.path.join(output_dir, 'nouns_oov_definition_parsed.txt'), 'w+') as file:
        file.writelines(nouns)


def parse_definitions_for_words_in(input_path='data/vocabulary_in_synset.txt', output_dir='data'):
    sNLP = StanfordNLP()
    first = True

    nouns = []
    verbs = []
    adj = []

    with open(input_path, 'r') as input_file:
        for line in input_file.readlines():
            if first:
                first = False
                continue

            [word, synset_name, category, _] = line.split('\t')
            definition = wn.synset(synset_name).definition()
            print(definition)

            if definition is not None and definition != '':
                try:
                    definition = sNLP.parse(definition)
                    _, pos = sNLP.pos(word)[0]
                except JSONDecodeError:
                    pass
                    continue

            if not pos.startswith(category):
                if category == 'V':
                    pos = 'VB'
                if category == 'N':
                    pos = 'NN'
                if category == 'A':
                    pos = 'JJ'

            if category == 'V':
                verbs.append('\t'.join([word, synset_name, definition, pos.upper(), '#\n']))
            else:
                if pos == 'N':
                    nouns.append('\t'.join([word, synset_name, definition, pos.upper(), '#\n']))
                else:
                    adj.append('\t'.join([word, synset_name, definition, pos.upper(), '#\n']))

    with open(os.path.join(output_dir, 'adj_definition_parsed.txt'), 'w+') as file:
        file.writelines(adj)
    with open(os.path.join(output_dir, 'verbs_definition_parsed.txt'), 'w+') as file:
        file.writelines(verbs)
    with open(os.path.join(output_dir, 'nouns_definition_parsed.txt'), 'w+') as file:
        file.writelines(nouns)


class DefinitionController:
    def __init__(self, path, pos, tagset=None):
        self.path = path
        self.pos = pos
        self.tagset = tagset
        self._rules = None
        self._definitions_given_rule = None

    def find_rules(self):
        if self._rules is None or self._definitions_given_rule:
            self._rules, self._definitions_given_rule = self._find_rules_in_definitions()
        return self._rules

    def _find_rules_in_definitions(self):
        with open(self.path, 'r') as input_file:
            words = {}

            rules = {}
            definitions_given_rule = {}

            for line in input_file.readlines():
                line = line.split('\t')

                word = line[0].lower()
                if word in words:
                    continue
                words[word] = 1

                parsed_definition = ParsedDefinition(word, line[1], line[-2], line[2])
                first_phrase = parsed_definition.first_phrase()
                if first_phrase is None:
                    continue

                sentence, first_constituent, children = first_phrase

                sentence_label = sentence.label()
                first_constituent_label = first_constituent.label()
                children_label = [x.label() for x in children]

                if sentence_label not in rules:
                    rules[sentence_label] = {}
                    definitions_given_rule[sentence_label] = {}

                if first_constituent_label not in rules[sentence_label]:
                    rules[sentence_label][first_constituent_label] = {}
                    definitions_given_rule[sentence_label][first_constituent_label] = {}

                children_label = '+'.join(children_label)
                if children_label not in rules[sentence_label][first_constituent_label]:
                    rules[sentence_label][first_constituent_label][children_label] = 0
                    definitions_given_rule[sentence_label][first_constituent_label][children_label] = []

                rules[sentence_label][first_constituent_label][children_label] += 1
                definitions_given_rule[sentence_label][first_constituent_label][children_label].append(
                    parsed_definition)

            return rules, definitions_given_rule

    def count_definition_using_rule(self, output_dir):
        if self._rules is None:
            self.find_rules()

        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        output_path = os.path.join(output_dir, self.pos + '_rules_found.txt')

        lines = []
        for sentence_tag in self._rules:
            for first_constituent in self._rules[sentence_tag]:
                for children in self._rules[sentence_tag][first_constituent]:
                    lines.append([sentence_tag, first_constituent, children,
                                  self._rules[sentence_tag][first_constituent][children]])

        lines.sort(key=lambda line: line[3], reverse=True)
        with open(output_path, 'w+') as output:
            output.writelines(['\t'.join([str(x) for x in line] + ['#\n']) for line in lines])

    def save_definitions_divided_by_rule(self, output_dir, limit=100):
        for sentence_tag in self._rules:
            for first_constituent in self._rules[sentence_tag]:
                for children in self._rules[sentence_tag][first_constituent]:
                    if len(self._definitions_given_rule[sentence_tag][first_constituent][children]) > limit:
                        self.definitions_using_rule(sentence_tag, first_constituent, children, output_dir=output_dir)

    def definitions_using_rule(self, sentence_tag, first_constituent, children, output_dir=None):
        if self._rules is None:
            self.find_rules()

        if output_dir is not None:
            lines = []
            for parsed_definition in self._definitions_given_rule[sentence_tag][first_constituent][children]:
                lines.append('\t'.join([parsed_definition.word(), parsed_definition.synset().name(),
                                        parsed_definition.definition(), '#\n']))

            if not os.path.exists(output_dir):
                os.mkdir(output_dir)

            filename = children
            for c in ['<', '>', ':', '"', '.', '/', '\\', '|', '?', '*', ' ']:
                filename = filename.replace(c, '')

            output_path = os.path.join(output_dir,
                                       sentence_tag + '_' + first_constituent + '_' + filename + '_definitions.txt')
            with open(output_path, 'w+') as output:
                output.writelines(lines)

        return self._definitions_given_rule[sentence_tag][first_constituent][children]

    def compute_example_of_rule(self, sentence_tag, first_constituent, children, output_dir):
        if self._rules is None:
            self.find_rules()

        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        lines = []
        for definition in self._definitions_given_rule[sentence_tag][first_constituent][children]:
            definition: ParsedDefinition = definition
            summarization = definition.first_phrase_summarized()
            if summarization is None:
                continue
            sentence_label, first_child_label, labels, heads = summarization

            lines.append('\t'.join([definition.synset().name(), definition.word(), '\t'.join(heads),
                                    definition.pos(), '\t'.join(labels), definition.definition(), '#\n']))

        output_path = os.path.join(output_dir,
                                   sentence_tag + '_' + first_constituent + '_' + children + '_examples.txt')

        with open(output_path, 'w+') as output:
            header = '\t'.join(['Synset_name', 'target', 'w1', 'w2',
                                'target_pos', 'w1_pos', 'w2_pos', 'definition', '#\n'])

            output.writelines([header] + lines)

    def most_used_rules(self, limit):
        if self._rules is None:
            self.find_rules()

        rules = []

        for sentence_tag in self._rules:
            for first_constituent in self._rules[sentence_tag]:
                for children in self._rules[sentence_tag][first_constituent]:
                    rules.append([(sentence_tag, first_constituent, children),
                                  self._rules[sentence_tag][first_constituent][children]])

        rules.sort(key=lambda rule: rule[1], reverse=True)
        if limit is not None:
            return [x[0] for x in rules[0:limit]]
        else:
            return [x[0] for x in rules]


def describe_constituents_in_parsed_definitions_from(controller, output_dir, limit=100):
    controller.count_definition_using_rule(output_dir)
    controller.save_definitions_divided_by_rule(os.path.join(output_dir, controller.pos + '_definitions'), limit=limit)


def build_examples_from(controller, output_dir, limit=15):
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    for (sentence_tag, first_constituent, children) in controller.most_used_rules(limit=limit):
        controller.compute_example_of_rule(sentence_tag, first_constituent, children,
                                           os.path.join(output_dir, controller.pos))


def get_oov_test_with_sister_terms():
    input_paths = [('data/oov_definitions/verbs_oov_definition_parsed.txt', 'v'),
                   ('data/oov_definitions/nouns_oov_definition_parsed.txt', 'n')]

    tagset = ['ADJP', '-ADV', 'ADVP', '-BNF', 'CC', 'CD', '-CLF', '-CLR', 'CONJP', '-DIR', 'DT', '-DTV', 'EX', '-EXT',
              'FRAG', 'FW', '-HLN', 'IN', 'INTJ', 'JJ', 'JJR', 'JJS', '-LGS', '-LOC', 'LS', 'LST', 'MD', '-MNR', 'NAC',
              'NN', 'NNS', 'NNP', 'NNPS', '-NOM', 'NP', 'NX', 'PDT', 'POS', 'PP', '-PRD', 'PRN', 'PRP', '-PRP',
              'PRP$', 'PRP-S', 'PRT', '-PUT', 'QP', 'RB', 'RBR', 'RBS', 'RP', 'RRC', 'S', 'SBAR', 'SBARQ', '-SBJ',
              'SINV', 'SQ', 'SYM', '-TMP', 'TO', '-TPC', '-TTL', 'UCP', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ',
              '-VOC', 'VP', 'WDT', 'WHADJP', 'WHADVP', 'WHNP', 'WHPP', 'WP', 'WP$', 'WP-S', 'WRB', 'X', 'AFX', '#', '$',
              '-LRB-', '\"', '(', ')', ',', '.', ':', '``']

    for (path, pos) in input_paths:
        controller = DefinitionController(path, pos, tagset)
        describe_constituents_in_parsed_definitions_from(controller, output_dir='data/oov_rules', limit=10)
        build_examples_from(controller, output_dir='data/oov_rules/examples', limit=10)


def get_examples():
    input_paths = [('data/definitions/verbs_definition_parsed.txt', 'v'),
                   ('data/definitions/nouns_definition_parsed.txt', 'n'),
                   ('data/definitions/adj_definition_parsed.txt', 'a')]

    tagset = ['ADJP', '-ADV', 'ADVP', '-BNF', 'CC', 'CD', '-CLF', '-CLR', 'CONJP', '-DIR', 'DT', '-DTV', 'EX', '-EXT',
              'FRAG', 'FW', '-HLN', 'IN', 'INTJ', 'JJ', 'JJR', 'JJS', '-LGS', '-LOC', 'LS', 'LST', 'MD', '-MNR', 'NAC',
              'NN', 'NNS', 'NNP', 'NNPS', '-NOM', 'NP', 'NX', 'PDT', 'POS', 'PP', '-PRD', 'PRN', 'PRP', '-PRP',
              'PRP$', 'PRP-S', 'PRT', '-PUT', 'QP', 'RB', 'RBR', 'RBS', 'RP', 'RRC', 'S', 'SBAR', 'SBARQ', '-SBJ',
              'SINV', 'SQ', 'SYM', '-TMP', 'TO', '-TPC', '-TTL', 'UCP', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ',
              '-VOC', 'VP', 'WDT', 'WHADJP', 'WHADVP', 'WHNP', 'WHPP', 'WP', 'WP$', 'WP-S', 'WRB', 'X']

    for (path, pos) in input_paths:
        controller = DefinitionController(path, pos, tagset)
        describe_constituents_in_parsed_definitions_from(controller, output_dir='data/rules', limit=50)
        build_examples_from(controller, output_dir='data/rules/examples', limit=15)
