#!/usr/bin/python
# -*- coding:utf-8 -*-
import pickle
import string
import nltk
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
import nltk.sentiment.util as util
import nltk.sentiment.sentiment_analyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer
from scipy.stats import norm


sid = SentimentIntensityAnalyzer()
examples = ["I'm happy to know that you are depressed by him"]
stop = stopwords.words('english') + list(string.punctuation) + ['FEMALE', 'MALE', 'NEUTRAL']
mean = None
sigma = None


def penn_to_wn(tag):
    '''convert pos tag to simple wordnet tags'''
    mapping = {
            'J': wn.ADJ,
            'N': wn.NOUN,
            'R': wn.ADV,
            'V': wn.VERB
            }
    for start in mapping:
        if tag.startswith(start):
            return mapping[start]
    return None


def get_senti(word, tag):
    '''get sentiment score of a single word'''
    word = word.lower()
    wn_tag = penn_to_wn(tag)

    if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
        return []

    try:
        lemma = lemmatizer.lemmatize(word, pos=wn_tag)
    except KeyError:
        return []

    synsets = wn.synsets(word, pos=wn_tag)
    if not synsets:
        return []

    # Take the first sense, the most common
    synset = synsets[0]
    swn_synset = swn.senti_synset(synset.name())
    return [swn_synset.pos_score(),swn_synset.neg_score(),swn_synset.obj_score()]


def get_emotional_words(text):
    global sid, stop

    emotional_words = []
    words = [w for w in nltk.word_tokenize(text) if w not in stop]
    tagged = nltk.pos_tag(words)
    for word, tag in tagged:
        # scores = get_senti(word, tag)
        # if len(scores) == 0:
        #     continue
        # if scores[0] > 0 or scores[1] > 0:  # pos score and neg score
        #     emotional_words.append(word)
        # print(get_senti(word, tag))
        ss = sid.polarity_scores(word)
        for key in ss:
            if ss[key] > 0.5 and (key == 'pos' or key == 'neg'):
                emotional_words.append(word)
    return emotional_words


def judge_emotional(text):
    global mean, sigma
    if mean is None or sigma is None:
        path = __file__.replace('emotional.py', 'stats.data')
        with open(path, 'rb') as fin:
            mean, sigma = pickle.load(fin)
    keywords = get_emotional_words(text)
    val = (len(keywords) - mean) / sigma
    return norm.cdf(val)


if __name__ == '__main__':
    print(__file__)
    for text in examples:
        print(text)
        print(get_emotional_words(text))
        print(judge_emotional(text))
