from path_manager import PathManager
import os
import zstandard
from datetime import datetime
import json
from PushshiftDumps.scripts.filter_file import read_lines_zst
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob


def filter_vader(obj):
    text_score = sid.polarity_scores(obj["text"])
    context_score = sid.polarity_scores(obj["context"])
    answer_score = sid.polarity_scores(obj["answer"])
    if text_score["compound"] < -0.95 or context_score["compound"] < -0.95 or answer_score["compound"] < -0.95:
        x = 1
    return False


def filter_text_blob(obj):
    if obj["level"] != 0:
        return False
    analysis_text = TextBlob(obj["text"])
    analysis_context = TextBlob(obj["context"])
    analysis_answer = TextBlob(obj["answer"])
    text_subjectivity = analysis_text.sentiment.subjectivity
    context_subjectivity = analysis_context.sentiment.subjectivity
    answer_subjectivity = analysis_answer.sentiment.subjectivity
    text_polarity = analysis_text.sentiment.polarity
    context_polarity = analysis_context.sentiment.polarity
    answer_polarity = analysis_answer.sentiment.polarity
    if answer_subjectivity < 0.8:
        if -0.5 < answer_polarity < 0.5:
            return True
        else:
            return True
    else:
        if answer_subjectivity > 0.8:
            if answer_polarity < -0.5 or answer_polarity > 0.5:
                return True
        return False
    # if text_score > 0.8 or context_score > 0.8 or answer_score > 0.8:
    #     return True
    return False


def vader(file_path):
    file_size = os.stat(file_path).st_size
    file_lines = 0
    created = None
    bad_lines = 0
    lines_created = 0
    for line, file_bytes_processed in read_lines_zst(file_path):
        file_lines += 1
        if file_lines % 100000 == 0:
            print(
                f"{created} Line: {file_lines:,} Bad Lines: {bad_lines:,} Bytes Processed: {file_bytes_processed:,} : {(file_bytes_processed / file_size) * 100:.0f}%")
        try:
            obj = json.loads(line)
            # test = filter_vader(obj)
            test = filter_text_blob(obj)

        except (KeyError, json.JSONDecodeError) as err:
            print("Error:" + err)
    print(f"Lines created: {lines_created} out of {file_lines} submissions of subreddit {subreddit}")
    print(f"Bad lines: {bad_lines}")


if __name__ == "__main__":
    path = os.path.join(PathManager.get_question_answers_path(), "personalfinance_qa.zst")
    subreddit = "personalfinance"
    nltk.download('vader_lexicon')
    sid = SentimentIntensityAnalyzer()
    vader(path)
