import os
import pandas as pd
from tqdm import tqdm
import pickle
import tiktoken

import openai
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)

# Load your API key from an environment variable or secret management service
openai.api_key = os.getenv("OPENAI_API_KEY")

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
MAX_TOKEN = 4096
longest_prompt = 110

@retry(wait=wait_random_exponential(min=5, max=60), stop=stop_after_attempt(6))
def completion_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

def truncate_text(text):
    if len(encoding.encode(text)) < MAX_TOKEN-longest_prompt:
        return text

    _text_turns = text.split('\n')
    _n_turns = len(_text_turns)
    _discard_idx = int(_n_turns/2)
    if _text_turns[_discard_idx].startswith('___Counselor___'):
        _discard_idx -= 1
    _discard_indices = [_discard_idx, _discard_idx+1]
    _remaining_turns = [_text_turns[idx] for idx in range(_n_turns) if idx not in _discard_indices]
    
    return truncate_text('\n'.join(_remaining_turns))

def retrieve_data():
    df_transcripts = pd.read_csv(os.getcwd()+'/data/extended_transcripts_allOutcome.tsv', sep='\t')

    # id_to_text = {}
    datadict = {'id':[], 'text':[], 'positive':[], 'informative':[], 'prepared':[]}
    for idx, row in df_transcripts.iterrows():
        _text = truncate_text(row['text'])
        datadict['id'].append(row['id'])
        datadict['positive'].append(row['positive'])
        datadict['informative'].append(row['informative'])
        datadict['prepared'].append(row['prepared'])
        datadict['text'].append(_text)

    return pd.DataFrame(data=datadict)

def prompt_gpt(text_list, id_list, questions):
    results = {'id':[], 'feature':[], 'answer':[]}
    try:
        for _id, _conv in tqdm(zip(id_list, text_list)):
            for key, _question in questions.items():
                message_list = [
                    {"role": "system", "content": "You are a helpful assistant to help me understand the chat conversation between HelpSeeker and Counselor. Briefly answer questions about the conversation.\n\n"},
                    {"role": "user", "content": _conv + "\n\n" + _question}
                ]

                response = completion_with_backoff(model="gpt-3.5-turbo", messages=message_list)

                results['answer'].append(response['choices'][0]['message']['content'])
                results['feature'].append(key)
                results['id'].append(_id)
                
        return results
    except:
        df = pd.DataFrame(data=results)
        df.to_csv(os.getcwd()+'/data/new_chatgpt_answers_partial.tsv', sep='\t', index=False)
        return False

questions = {
    "Identity": "Who is the HelpSeeker? Don't answer in sentences and answer by only choosing one from the given categories: {Maltreated child, Peer/friend of the abused, Family member of the abused, Other known adult, Unknown person, Other}",
    "Perpetrator": "Who is the perpetrator? Don't answer in sentences and answer by only choosing one from the given categories: {Parents, Siblings, Step-parents, Ex-partners, Relatives, Unknown person, Peer/friend, Other}",
    "Type": "What is the type of the abuse or the stress? Don't answer in sentences and answer by only choosing from the given categories: {Physical abuse, Verbal/Emotional abuse, Neglect/Careless behaviors, Stress from family, Stress from friends, Stress from school/work}",
    "Severity": "What is the nature and severity of the abuse or the stress? Don't answer in sentences and answer by only choosing from the given categories: {Imminent danger, Persistent abuse, Poor care, Casual behavior}",
    "Needs": "Why does the HelpSeeker come talk to the Counselor? Don't answer in sentences and answer by only choosing from the given categories: {Seeking resources, Getting emotional support, Getting advice about the situation, Reporting the situation, Not clear}",
    "Offers": "How does the Counselor help the HelpSeeker? Don't answer in sentences and answer by only choosing from the given categories: {Providing resources, Reflection of feelings, Affirmation or reassurance, Providing advice about the situation, Not clear}",
    "Strategy": "How does the Counselor explore the issue? Don't answer in sentences and answer by only choosing from the given categories: {Paraphrasing/Interpreting, Reflecting feelings, Asking questions, Validating/Reassuring, Providing information/advice}",
    "Tried": "What are the things that have previously done by the HelpSeeker to resolve the situation? Don't answer in sentences and answer by only choosing from the given categories: {None, Contacting authorities (CPS, police, etc.), Talking to professionals (counselors, therapists, etc.), Talking to others (family, friends, etc.), Self care methods, Others}",
    "Solution": "What are the things suggested by the Counselor to resolve the situation? Don't answer in sentences and answer by only choosing from the given categories: {None, Contacting authorities (CPS, police, etc.), Talking to professionals (counselors, therapists, etc.), Talking to others (family, friends, etc.), Self care methods, Others}",
    "Reaction": "What is the HelpSeeker's reaction to the Counselor's suggestion? Don't answer in sentences and answer by only choosing from the given categories: {Accepting, Accepting with concern, Doubting, Has already been tried, Denying}",
    "HelpSeeker": "Are there any indications that the HelpSeeker didn't like the chat? Consider if they are being hopeless, doubtful, denial, dissatisfied, etc. Only answer by 'yes' if you can find the indication, and 'no' otherwise",
    "Counselor": "Are there any indications that the Counselor hurt the HelpSeeker's feelings? Don't answer in sentences and answer by only choosing from the given categories: {Trivializing issues, Lacking reflection of feelings, Pushing certain advice, Lacking exploration, Lacking concrete solutions, None}"
}

data_df = retrieve_data()
# data_df = data_df[:20] # for test
result_dict = prompt_gpt(data_df['text'].tolist(), data_df['id'].tolist(), questions)

if result_dict:
    df = pd.DataFrame(data=result_dict)
    df.to_csv(os.getcwd()+'/data/new_chatgpt_answers.tsv', sep='\t', index=False)