import json
from glob import glob
import sys
import pandas as pd
import numpy as np
import nltk


dataset = json.load( open("path_to_data", "r"))

quotes_list_csv = []
narratives_list_csv = []
span_q_1_list_csv = []
span_q_2_list_csv = []
span_q_3_list_csv = []
span_q_4_list_csv = []
span_q_5_list_csv = []
span_n_1_list_csv = []
span_n_2_list_csv = []
span_n_3_list_csv = []
span_n_4_list_csv = []
span_n_5_list_csv = []

num_spans_list = []



for d in dataset:
    quote = d["fields"]["quote"].strip("\n").strip().lower()
    narr = d["fields"]["narrative"].strip("\n").strip().lower()
    num_spans = 0

    sq1 = d["fields"]["span_quote_1"].strip("\n").strip().lower()
    sq2 = d["fields"]["span_quote_2"].strip("\n").strip().lower()
    sq3 = d["fields"]["span_quote_3"].strip("\n").strip().lower()
    sq4 = d["fields"]["span_quote_4"].strip("\n").strip().lower()
    sq5 = d["fields"]["span_quote_5"].strip("\n").strip().lower()
    
    sn1 = d["fields"]["span_narrative_1"].strip("\n").strip().lower()
    sn2 = d["fields"]["span_narrative_2"].strip("\n").strip().lower()
    sn3 = d["fields"]["span_narrative_3"].strip("\n").strip().lower()
    sn4 = d["fields"]["span_narrative_4"].strip("\n").strip().lower()
    sn5 = d["fields"]["span_narrative_5"].strip("\n").strip().lower()
    
    quotes_list_csv.append(quote)
    narratives_list_csv.append(narr)

    span_q_1_list_csv.append(sq1)
    span_n_1_list_csv.append(sn1)

    span_q_2_list_csv.append(sq2)
    span_n_2_list_csv.append(sn2)

    span_q_3_list_csv.append(sq3)
    span_n_3_list_csv.append(sn3)

    span_q_4_list_csv.append(sq4)
    span_n_4_list_csv.append(sn4)

    span_q_5_list_csv.append(sq5)
    span_n_5_list_csv.append(sn5)

    if len(sq1) > 0 :
        num_spans += 1
    if len(sq2) > 0 :
        num_spans += 1
    if len(sq3) > 0 :
        num_spans += 1
    if len(sq4) > 0 :
        num_spans += 1
    if len(sq5) > 0 :
        num_spans += 1
    
    num_spans_list.append(num_spans)

from nltk.sentiment.vader import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

sentiment_score_dict = {}
sentiment_score_dict_quote = {}
sentiment_num_positive = {}
sentiment_num_negative = {}
sentiment_num_neutral = {}

for i in range(len(narratives_list_csv)):
    _narr = narratives_list_csv[i].lower().strip("\n").strip()
    quote = quotes_list_csv[i].lower().strip("\n").strip()
    sentiment = sia.polarity_scores(_narr)["compound"]

    if quote not in sentiment_score_dict.keys():
        sentiment_score_dict[quote] = []
    
    

    sentiment_score_dict[quote].append(sentiment)


for i in range (len(quotes_list_csv)):
    quote = quotes_list_csv[i].lower().strip("\n").strip()
    if quote in sentiment_score_dict_quote.keys():
        continue
    
    sentiment = sia.polarity_scores(quote)["compound"]
    sentiment_score_dict_quote[quote] = sentiment


for key in sentiment_score_dict.keys():
    num_positive = 0
    num_negative = 0
    num_neutral = 0

    for y in sentiment_score_dict[key]:
        if y >= 0.05 :
            num_positive += 1
        elif y <= - 0.05:
            num_negative += 1
        else:
            num_neutral += 1
    sentiment_score_dict[key] = np.mean(np.array(sentiment_score_dict[key]))
    sentiment_num_positive[key] = num_positive
    sentiment_num_negative[key] = num_negative
    sentiment_num_neutral[key] = num_neutral

sentiment_score_dict = dict(sorted(sentiment_score_dict.items(), key=lambda item: item[1],reverse=True))
sentiment_score_dict_quote = dict(sorted(sentiment_score_dict_quote.items(), key=lambda item: item[1]))

print ("\n Avg. sentiment scores for each quote : ")
print()

# for k in sentiment_score_dict.keys():
#     print(k , " = ", sentiment_score_dict[k])


for k in sentiment_score_dict_quote.keys():
    print(k,"|&|",sentiment_score_dict_quote[k], "|&|", sentiment_score_dict[k], "|&|", sentiment_num_positive[k], "|&|", sentiment_num_negative[k], "|&|", sentiment_num_neutral[k])