# -*- coding: utf-8 -*-

from framework.common.logger import LOGGER

try:
    import pattern.en as EN
except ImportError:
    import os
    import sys
    main = sys.modules['__main__']
    sys.path.append(os.path.join(os.path.dirname(main.__file__), 'third-party', 'pattern'))
    try:
        import pattern.en as EN
    except ImportError:
        LOGGER.warning('Can not import pattern')
        EN = None

SPECIAL_TOKENS = {
    '2': 'two', '3': 'three', '4': 'four', '5': 'five',
    '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine'
}


def _is_digital(token):
    return token is not None and (token in ['%', '$'] or any(c.isdigit() for c in token))


def post_process_tokens(hypothesis_tokens):
    if EN is None:
        return hypothesis_tokens

    tokens = []
    num_tokens = len(hypothesis_tokens)
    for index, token in enumerate(hypothesis_tokens):
        prev_token = hypothesis_tokens[index - 1] if index > 0 else None
        next_token = hypothesis_tokens[index + 1] if index + 1 < num_tokens else None
        if token in SPECIAL_TOKENS:
            if not _is_digital(prev_token) and not _is_digital(next_token):
                token = SPECIAL_TOKENS[token]
        elif token == '1':
            if next_token == 'year':
                token = 'a'
            elif not _is_digital(prev_token) and not _is_digital(next_token):
                token = 'one'
        elif '￨' in token:
            # sense, pers, pref
            lemma, neg, pos_tag, _, tense, num, _, prog, _ = token.split('￨')
            # TODO: deal with neg property
            if pos_tag == 'n':
                if num == 'sg':
                    token = EN.singularize(lemma)
                else:
                    token = EN.pluralize(lemma)
            elif pos_tag == 'v':
                if prog == '+':
                    token = EN.conjugate(lemma, 'part')
                elif tense == 'past':
                    token = EN.conjugate(lemma, tesnse=EN.PAST)
                elif tense == 'fut':
                    token = EN.conjugate(lemma, tesnse=EN.FUTURE)
                elif tense == 'pres':
                    token = EN.conjugate(lemma, tesnse=EN.PRESENT)
                else:
                    token = lemma
            else:
                token = lemma  # no conjugate
        tokens.append(token)
    return tokens
