import overall_utils

from agents.Agents import ZEROSHOT_FEEDBACK,ZEROSHOTMADJUDGE,ZEROSHOTMAD
from .templates.hotpotQA_prompts import *
from .templates.CSQA_prompts import *
from .evaluations import hotpotQA_eval
from .evaluations import CSQA_eval

import copy
import logging
from langchain.schema import (
    ChatMessage,
    ChatResult,
    AIMessage,
    HumanMessage,
    SystemMessage,
)
import openai



def parse_pred_answer(dataset: str, generated_response: str, use_json_format=False,json_key="Answer"):
    if isinstance(generated_response, str):
        generated_response = [generated_response]
        
    if dataset == "hotpotQA":
        return [hotpotQA_eval.parse_answer_noreact_hotpot(_)[1] for _ in generated_response]
    elif dataset == "CSQA":
        return [CSQA_eval.parse_answer_noreact_CSQA(_)[1] for _ in generated_response]

def check_correctness(dataset: str, generated_response: str, gold: str):
    if dataset == "hotpotQA":
        return [hotpotQA_eval.grade_answer_hotpot(_, gold) for _ in generated_response]


class QA_ZEROSHOTMAD(ZEROSHOTMAD):
    def __init__(self, models_tokenizers,question, answer,example,config) -> None:
        super().__init__(models_tokenizers,question, answer,example,config)

    def run(self):
        question = self.question; answer = self.answer
        summarize = self.configs["summarize"]
        partial_context = self.configs["partial_context"]
        ReAct_format = self.configs["ReAct_format"]
        use_context = self.configs["use_context"]
        strictly_last_round = self.configs["strictly_last_round"]
        context = self.example["supporting_paragraphs"] if use_context else ""
        self._init_players()

        # initialize first round
        if self.prompt_strategy in MAD_PROMPT_TEMPLATES:
            MAD_START = MAD_PROMPT_TEMPLATES[self.prompt_strategy][0]
            fewshots = MAD_PROMPT_TEMPLATES[self.prompt_strategy][1]
            MAD_SUMMARIZE = MAD_PROMPT_TEMPLATES["MAD_SUMMARIZE"]
            initial_prompt = MAD_START.format(context= context, question= question, scratchpad="")
        elif self.prompt_strategy in CSQA_MAD_PROMPT_TEMPLATES:
            MAD_START = CSQA_MAD_PROMPT_TEMPLATES[self.prompt_strategy][0]
            fewshots = CSQA_MAD_PROMPT_TEMPLATES[self.prompt_strategy][1]
            MAD_SUMMARIZE = CSQA_MAD_PROMPT_TEMPLATES["MAD_SUMMARIZE"]        
            initial_prompt = MAD_START.format(question= question)
        [agent.add_user_message(initial_prompt) for agent in self.players]
        current_round_players = self.players
        last_round_players = None
        summarize_players = None
        all_summarize_players = []

        # first round
        temp_config = copy.deepcopy(self.player_config)
        temp_config["n"] = self.num_agents
        temp_model = overall_utils._load_model(temp_config)
        temp_agent = self._create(temp_config,temp_model,self.player_model_name,f"1st_round_temp_agent")
        if self.configs["dataset"] == "hotpotQA":
            first_round_prompt = MAD_START.format(context= context, question= question, scratchpad="")
        elif self.configs["dataset"] == "CSQA":
            first_round_prompt = MAD_START.format(question= question)
        temp_agent.add_user_message(first_round_prompt)
        first_round_responses = temp_agent.get_completion()
        [agent.add_assistant_message(response) for agent,response in zip(self.players,first_round_responses)]
        # put all the tokens used in the frist agent just for the record
        self.players[0].prompt_token_used = temp_agent.prompt_token_used
        self.players[0].completion_token_used = temp_agent.completion_token_used
        parsed_answers = parse_pred_answer(self.configs["dataset"], first_round_responses)
        self.all_solutions.extend(parsed_answers)

        for round in range(1,self.num_rounds):
            if round != 0:
                if summarize:
                    summarize_players = [self._create_debate_player(f"summarize_player{_}_round{round}") for _ in range(self.num_agents)]
                    for idx in range(self.num_agents):
                        response = current_round_players[idx].conversations[-1]["content"]
                        # print(f"\n\nbefore summary: {len(response.split())}")
                        summarize_players[idx].add_user_message(MAD_SUMMARIZE.format(question=question,answer=response))
                        summary = summarize_players[idx].get_completion()
                        # print(f"after summary: {len(summary[0].split())}")
                        summarize_players[idx].add_assistant_message(summary)
                        current_round_players[idx].conversations.pop()
                        current_round_players[idx].add_assistant_message(summary)
                    all_summarize_players.extend(summarize_players)
            for i in range(self.num_agents):
                if round != 0:
                    other_agents = current_round_players[:i] + current_round_players[i+1:]
                    if strictly_last_round:
                        round_num = 2*round-1
                    else:
                        round_num = -1
                    if self.configs["dataset"] == "hotpotQA":
                        message = current_round_players[i].construct_message_from_other_players_hotpotqa(other_agents,question,context,"",round=round_num)
                    elif self.configs["dataset"] == "CSQA":
                        message = current_round_players[i].construct_message_from_other_players_CSQA(other_agents,question,round=round_num)
                    current_round_players[i].add_user_message(message["content"])
                # print(f"----round{round}----:")
                # print(f"{current_round_players[i].conversations}\n\n\n")
                # if round>0:
                #     for conv in current_round_players[i].conversations:
                #         print(conv)
                completion = current_round_players[i].get_completion(partial_context=partial_context,use_long_context=False)
                current_round_players[i].add_assistant_message(completion)
                parsed_answer = parse_pred_answer(self.configs["dataset"], completion)
                self.all_solutions.append(parsed_answer)
            # if round < self.num_rounds-1: # don't run last iter
            #     last_round_players = current_round_players
            #     current_round_players = [DEBATE_PLAYER(self.player_config, self.llm_agent_player,self.player_model_name, f"player{_}_round{round+1}") for _ in range(self.num_agents)]
            #     self.players.extend(current_round_players)
        self.players.extend(all_summarize_players)
        self._log()

        return False




class QA_ZEROSHOTMADJUDGE(ZEROSHOTMADJUDGE):
    def __init__(self, models_tokenizers,question, answer,example,config) -> None:
        super().__init__(models_tokenizers,question, answer,example,config)
    def run(self):
        question = self.question; answer = self.answer
        self.init_players()

        # whether use json format
        self.json_format = self.configs["use_json"]
        if self.json_format == True:
            judge_user_template = MAD_JUDGE_JUDGE_USER
            ultimate_judge_user_template2 = MAD_JUDGE_JUDGE_USER_ADDITIONAL2
        else:
            judge_user_template = MAD_JUDGE_JUDGE_USER_NOJSON
            ultimate_judge_user_template2 = MAD_JUDGE_JUDGE_USER_NOJSON_ADDITIONAL2

        # create initial solution
        initial_prompt = MAD_JUDGE_START2.format(question= question)
        first_solution_player = self._create_player("first_solution_player")
        first_solution_player.add_system_message(initial_prompt)
        base_solution = first_solution_player.get_completion()
        first_solution_player.add_assistant_message(base_solution)
        self.base_solution = base_solution
        parsed_answer = parse_pred_answer(self.configs["dataset"], base_solution)
        self.all_solutions.append(parsed_answer)

        # first round
        self.players[0].add_system_message(MAD_JUDGE_PLAYER_SYSTEM.format(question = question))
        self.players[1].add_system_message(MAD_JUDGE_PLAYER_SYSTEM.format(question = question))
        self.players[2].add_system_message(MAD_JUDGE_JUDGE_SYSTEM.format(question = question))

        self.players[0].add_user_message(MAD_JUDGE_FIRST_PLAYER1_USER.format(solution = self.base_solution))
        player1_solution = self.players[0].get_completion()
        self.players[0].add_assistant_message(player1_solution)

        self.players[1].add_user_message(MAD_JUDGE_FIRST_PLAYER2_USER.format(solution = player1_solution))
        player2_solution = self.players[1].get_completion()
        self.players[1].add_assistant_message(player2_solution)

        self.players[2].add_user_message(judge_user_template.format(round="first",
                                        affirmative_ans=player1_solution,
                                        negative_ans=player2_solution))
        judge_response = self.players[2].get_completion()
        self.players[2].add_assistant_message(judge_response)
        self._append_to_all_solutions(judge_response)
        last_judge_response = self.all_solutions[-1]


        # debate rounds
        for round in range(self.num_rounds-1):
            if last_judge_response != '':
                break
            else:
                self.players[0].add_user_message(MAD_JUDGE_PLAYER_USER.format(solution = player2_solution))
                player1_solution = self.players[0].get_completion()
                self.players[0].add_assistant_message(player1_solution)

                self.players[1].add_user_message(MAD_JUDGE_PLAYER_USER.format(solution = player1_solution))
                player2_solution = self.players[1].get_completion()
                self.players[1].add_assistant_message(player2_solution)

                self.players[2].add_user_message(judge_user_template.format(round=self._round_dct(round+2),
                                        affirmative_ans=player1_solution,
                                        negative_ans=player2_solution))
                judge_response = self.players[2].get_completion()
                self.players[2].add_assistant_message(judge_response)
                self._append_to_all_solutions(judge_response)
        
        # additional technique
        last_judge_response = self.all_solutions[-1]
        if last_judge_response != '':
            self.final_solution = last_judge_response
        else:
            ultimate_judge = self._create_judge("ultimate_judge")
            ultimate_judge.add_system_message(MAD_JUDGE_JUDGE_SYSTEM.format(question = question))

            player1_solution = self.players[0].conversations[2]["content"] # 1. system message 2. user message 3. assistant message
            player2_solution = self.players[1].conversations[2]["content"]

            ultimate_judge.add_user_message(MAD_JUDGE_JUDGE_USER_ADDITIONAL1.format(affirmative_ans=player1_solution,
                                        negative_ans=player2_solution))
            ultimate_judge_response = ultimate_judge.get_completion()
            ultimate_judge.add_assistant_message(ultimate_judge_response)

            ultimate_judge.add_user_message(ultimate_judge_user_template2.format(question = question))
            ultimate_judge_response2 = ultimate_judge.get_completion()
            # print(f"ultimate_judge_response2: {ultimate_judge_response2}")
            # import json
            # json.dump(ultimate_judge_response2, open("ultimate_judge_response2.json", "w"))
            # print(json.dumps(ultimate_judge_response2))
            ultimate_judge.add_assistant_message(ultimate_judge_response2)
            self._append_to_all_solutions(ultimate_judge_response2)
            ultimate_ans = self.all_solutions[-1]
            if ultimate_ans != '':
                self.final_solution = ultimate_ans
            self.players.append(ultimate_judge)

        self._log()
        return False



reasoning_plan = ""
class QA_ZEROSHOT_SELFDISCOVER(ZEROSHOT_FEEDBACK):
    def __init__(self, models_tokenizers,question, answer,example,config) -> None:
        super().__init__(models_tokenizers,question, answer,example,config)


    def run(self):
        global reasoning_plan
        self._init_models()
        self.answer_value = self.answer
        use_context = self.configs["use_context"] if "use_context" in self.configs else False
        context = self.example["supporting_paragraphs"] if use_context else ""
        if reasoning_plan == "":
            # SELECT
            agent = self._create_player(f"SELECT")
            task_examples = SELF_DISCOVER_TASKEXAMPLES[self.configs["dataset"]]
            agent.add_user_message(SELECT.format(reasoning_modules = "\n".join(reasoning_modules),task_examples=task_examples))
            cot_response = agent.get_completion(temperature=0, n=1)
            agent.add_assistant_message(cot_response)
            self.players.append(agent)

            # ADAPT
            agent = self._create_player(f"ADAPT")
            agent.add_user_message(ADAPT.format(reasoning_modules = cot_response,task_examples=task_examples))
            cot_response = agent.get_completion(temperature=0, n=1)
            agent.add_assistant_message(cot_response)
            self.players.append(agent)

            # IMPLEMENT
            agent = self._create_player(f"IMPLEMENT")
            agent.add_user_message(IMPLEMENT.format(reasoning_modules = "\n".join(reasoning_modules),task_examples=task_examples))
            cot_response = agent.get_completion(temperature=0, n=1)
            agent.add_assistant_message(cot_response)
            self.players.append(agent)
            reasoning_plan = cot_response

        question = self.question; answer = self.answer
        self.answer_type=None
        use_llama_template = self.configs["use_llama_template"] if "use_llama_template" in self.configs else False

        agent = self._create_player(f"thought_agent0")
        if self.configs["dataset"] == "hotpotQA":
            question_prompt = hotpotQA_EXECUTE.format(reasoning_structure=reasoning_plan,question=question,context=context)
        elif self.configs["dataset"] == "CSQA":
            question_prompt = CSQA_EXECUTE.format(reasoning_structure=reasoning_plan,question=question)
        agent.add_user_message(question_prompt)
        cot_response = agent.get_completion()
        agent.add_assistant_message(cot_response)
        self.players.append(agent)


        self._log()
        return False



class QA_ZEROSHOT_FEEDBACK(ZEROSHOT_FEEDBACK):
    def __init__(self, models_tokenizers,question, answer,example,config) -> None:
        super().__init__(models_tokenizers,question, answer,example,config)


    def run(self):
        question = self.question; answer = self.answer
        use_context = self.configs["use_context"]
        ReAct_format = self.configs["ReAct_format"]
        if self.prompt_strategy in COT_PROMPT_TEMPLATES:
            COT_template = COT_PROMPT_TEMPLATES[self.prompt_strategy]
            REFLECT_template = None
        elif self.prompt_strategy in REFLEXION_PROMPT_TEMPLATES:
            COT_template = REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy][0]
            REFLECT_template = REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy][1]
        elif self.prompt_strategy in hotpotQA_RECURCOT_TEMPLATES:
            COT_template = hotpotQA_RECURCOT_TEMPLATES[self.prompt_strategy][0]
            RECUR_template = hotpotQA_RECURCOT_TEMPLATES[self.prompt_strategy][1]
        # CSQA
        elif self.prompt_strategy in CSQA_COT_PROMPT_TEMPLATES:
            COT_template = CSQA_COT_PROMPT_TEMPLATES[self.prompt_strategy]
            REFLECT_template = None
        elif self.prompt_strategy in CSQA_REFLEXION_PROMPT_TEMPLATES:
            COT_template = CSQA_REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy][0]
            REFLECT_template = CSQA_REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy][1]
        elif self.prompt_strategy in CSQA_RECURCOT_TEMPLATES:
            COT_template = CSQA_RECURCOT_TEMPLATES[self.prompt_strategy][0]
            RECUR_template = CSQA_RECURCOT_TEMPLATES[self.prompt_strategy][1]

        if self.strategy == "ZeroShot_Feedback_JID":
            self.answer_value = parse_pred_answer(self.configs["dataset"], self.answer)
            self._init_players()
            for i in range(self.num_rounds):
                self.players[i*2].add_user_message(COT_template.format(question = question))
                cot_response = self.players[i*2].get_completion()
                self.players[i*2].add_assistant_message(cot_response)
                parsed_answer = parse_pred_answer(self.configs["dataset"], cot_response)
                self.all_solutions.append(parsed_answer)
                self.all_raw_solutions.append(cot_response)

                self.players[i*2+1].add_user_message(JID_FEEDBACK.format(question = question,answer=cot_response))
                reflect_response = self.players[i*2+1].get_completion()
                self.players[i*2+1].add_assistant_message(reflect_response)
                parsed_answer = parse_pred_answer(self.configs["dataset"], reflect_response)
                self.all_solutions.append(parsed_answer)
                self.all_raw_solutions.append(reflect_response)

        elif self.strategy == "ZeroShotReflexion":
            self.answer_value = self.answer
            self._init_models()
            self.reflections = []
            last_trial = self.configs["last_trial"]
            few_shot = self.configs["few_shot"]
            cheat = self.configs["cheat"]
            self_value = self.configs["self_value"]
            players_by_round = [[] for i in range(self.num_rounds)]
            context = self.example["supporting_paragraphs"] if use_context else ""

            def _cot_propose_answer(proposer_name, reflection_str,round_num):
                proposer = self._create_player(proposer_name)
                scratchpad = ""
                if ReAct_format:
                    # Thought
                    scratchpad = '\nThought: '
                    proposer.add_user_message(COT_template.format(reflections=reflection_str,context=context,question = question,scratchpad=scratchpad))
                    thought_responses = proposer.get_completion()
                    proposer.add_assistant_message(thought_responses)
                    players_by_round[round_num].append(proposer)

                    # Action
                    temp_config = copy.deepcopy(self.player_config)
                    temp_config["n"] = 1
                    temp_model = overall_utils._load_model(temp_config)
                    for idx,thought_response in enumerate(thought_responses):
                        scratchpad = '\nThought: ' + thought_response + '\nAction: '
                        action_proposer = self._create(temp_config,temp_model,self.player_model_name,f"round{round_num}_action_agent{idx}")
                        action_proposer.add_user_message(COT_template.format(reflections=reflection_str,context=context,question = question,scratchpad=scratchpad))
                        cot_response = action_proposer.get_completion()
                        action_proposer.add_assistant_message(cot_response)
                        players_by_round[round_num].append(action_proposer)
                        self.all_raw_solutions.append(cot_response)
                        scratchpad += cot_response[0]
                else:
                    if self.configs["dataset"] == "hotpotQA":
                        cot_prompt = COT_template.format(reflections=reflection_str,context=context,question = question,scratchpad="")
                    elif self.configs["dataset"] == "CSQA":
                        cot_prompt = COT_template.format(reflections=reflection_str, question = question)
                    proposer.add_user_message(cot_prompt)
                    # print(f"COT_TEMPLATE:\n{cot_prompt}")
                    cot_response = proposer.get_completion()
                    proposer.add_assistant_message(cot_response)
                    players_by_round[round_num].append(proposer)
                    # parsed_answer = parse_pred_answer(self.configs["dataset"], cot_response)
                    # self.all_solutions.append(parsed_answer)
                    self.all_raw_solutions.append(cot_response)
                    scratchpad = "\nAnswer: " + cot_response[0]
                return cot_response, scratchpad
            def _format_reflexion_reflect(question, prev_answer,last_trial):
                last_trial_str = ""
                reflect_str = ""
                if last_trial:
                    last_trial_str = LAST_TRIAL_HEADER + f'Question: {question}\n' + prev_answer + '\n(END PREVIOUS TRIAL)\n'
                    reflect_str = REFLECTION_AFTER_LAST_TRIAL_HEADER + 'Reflections:\n- ' + '\n- '.join([r.strip() for r in self.all_feedbacks])
                else:
                    reflect_str = REFLECTION_HEADER + 'Reflections:\n- ' + '\n- '.join([r.strip() for r in self.all_feedbacks])
                
                return last_trial_str + reflect_str
            def _reflexion_reflect(reflector, question,scratchpad, fewhot=False,last_trial=False,cheat=False):
                if self.configs["dataset"] == "hotpotQA":
                    reflect_prompt = REFLECT_template.format(context=context,question = question,scratchpad=scratchpad)
                elif self.configs["dataset"] == "CSQA":
                    reflect_prompt = REFLECT_template.format(question = question,scratchpad=scratchpad)
                reflector.add_user_message(reflect_prompt)
                # print(f"REFLECT_template: {reflect_prompt}")
                reflect_response = reflector.get_completion()
                reflector.add_assistant_message(reflect_response)
                reflection_str = reflect_response
                self.all_feedbacks.extend(reflect_response)
                reflection_str = _format_reflexion_reflect(question, scratchpad,last_trial)
                return reflection_str
            
            # get intitial solution and feedback
            cot_response,scratchpad = _cot_propose_answer("round0_proposer", "",0)
        
            for i in range(self.num_rounds-1):
                if cheat:
                    gold = parse_pred_answer(self.configs["dataset"], answer)
                    pred_answer = parse_pred_answer(self.configs["dataset"], cot_response)
                    if check_correctness(self.configs["dataset"], pred_answer, gold)[0] == 1:
                        break
                if self_value:
                    # create the evaluator
                    self.evaluate_config = self.configs["llms"]["llm_agent_evalaute"]
                    self.evaluate_model_name = self.evaluate_config["model_name"]
                    self.llm_agent_evalaute = overall_utils._load_model(self.evaluate_config)
                    evaluator = self._create(self.evaluate_config,self.llm_agent_evalaute,self.evaluate_model_name,f"evaluate{i}")

                    evaluator.add_user_message(CoT_Eval_Analysis["COT_EVAL_EFFECT2"].format(question = question,answer=cot_response[0]))
                    cot_value_response = evaluator.get_completion()
                    evaluator.add_assistant_message(cot_value_response)
                    players_by_round[i].append(evaluator)
                    self.all_values.append(cot_value_response)
                    
                # get new feedback
                # print(f"----round{i+1}----:")
                # print(f"cot_response: {cot_response}")
                # print(f"scratchpad: {scratchpad}")
                reflector = self._create_feedback(f"round{i+1}_reflector")
                reflect_response = _reflexion_reflect(reflector,question,scratchpad, fewhot=few_shot,last_trial=last_trial,cheat=cheat)
                # print(f"reflect_response:\n{reflect_response}")
                players_by_round[i+1].append(reflector)

                # get new solution
                cot_response,scratchpad = _cot_propose_answer(f"round{i+1}_proposer", reflect_response,i+1)
                # parsed_answer = parse_pred_answer(self.configs["dataset"], cot_response)
                # self.all_solutions.append(parsed_answer)
                # self.all_raw_solutions.append(cot_response)
            for i in range(len(players_by_round)):
                self.players.extend(players_by_round[i])
        elif self.strategy == "ZeroShot_CoT":
            self.answer_value = self.answer
            self._init_models()
            assert self.num_rounds == self.player_config["n"]
            context = self.example["supporting_paragraphs"] if use_context else ""

            if ReAct_format == True:
                # hotpotQA only
                self.players.append(self._create_player(f"thought_agent{0}"))
                # thought then answer, need to prompt twice
                # Thought
                scratchpad = '\nThought: '
                self.players[0].add_user_message(COT_template.format(context=context, question=question, scratchpad=scratchpad))
                thought_responses = self.players[0].get_completion()
                self.players[0].add_assistant_message(thought_responses)

                # Action
                temp_config = copy.deepcopy(self.player_config)
                temp_config["n"] = 1
                temp_model = overall_utils._load_model(temp_config)
                for idx,thought_response in enumerate(thought_responses):
                    scratchpad = '\nThought: ' + thought_response + '\nAction: '
                    self.players.append(self._create(temp_config,temp_model,self.player_model_name,f"action_agent{idx}"))
                    self.players[-1].add_user_message(COT_template.format(context=context, question=question, scratchpad=scratchpad))
                    cot_response = self.players[-1].get_completion()
                    self.players[-1].add_assistant_message(cot_response)
                    self.all_raw_solutions.append(cot_response)
            else:
                self.players.append(self._create_player(f"thought_agent{0}"))
                scratchpad = ""
                if self.configs["dataset"] == "hotpotQA":
                    question_prompt = COT_template.format(context=context, question=question, scratchpad=scratchpad)
                else:
                    question_prompt = COT_template.format(question=question)
                self.players[0].add_user_message(question_prompt)
                cot_responses = self.players[0].get_completion()
                self.players[0].add_assistant_message(cot_responses)
                # parsed_answers = parse_pred_answer(self.configs["dataset"], cot_responses, use_json_format=self.cot_resulst_json,json_key="Answer") 
                # self.all_solutions.append(parsed_answers)
                self.all_raw_solutions.append(cot_responses)

            if "self_value" in self.configs:
                if self.configs["self_value"] == True:
                    for cot_response in cot_responses:
                        new_player = self._create_feedback(f"evaluate{i}")
                        new_player.add_user_message(COT_VALUE.format(question = question,answer=cot_response))
                        cot_value_response = new_player.get_completion()
                        new_player.add_assistant_message(cot_value_response)
                        self.players.append(new_player)
                        try:
                            cot_value_response = eval(cot_value_response)
                            self.all_values.append(cot_value_response["Evaluation"])
                        except:
                            self.all_values.append(cot_value_response)
        elif self.strategy == "CoT_RecursiveCoT":
            # dataset: hotpotqa, CSQA
            # generate CoT first, and then evaluate whether it is correct or not
            # num_agents is the number of evaluator

            # normal CoT to get the solutions
            self.answer_value = self.answer
            self._init_models()
            assert self.num_rounds == self.player_config["n"]
            context = self.example["supporting_paragraphs"] if use_context else ""

            self.players.append(self._create_player(f"thought_agent{0}"))
            scratchpad = ""
            if self.configs["dataset"] == "hotpotQA":
                question_prompt = COT_template.format(context=context, question=question, scratchpad=scratchpad)
            else:
                question_prompt = COT_template.format(question=question)
            self.players[0].add_user_message(question_prompt)
            cot_responses = self.players[0].get_completion()
            self.players[0].add_assistant_message(cot_responses)
            parsed_answers = parse_pred_answer(self.configs["dataset"], cot_responses) 
            self.all_solutions.append(parsed_answers)
            self.all_raw_solutions.append(cot_responses)

            # evaluate the CoT solutions
            assert self.num_agents == self.feedback_config["n"]
            answers = cot_responses
            for i in range(len(answers)):
                # add the answer as a new player
                answer = answers[i]
                the_evaluator = self._create_feedback(f"evaluate{i}_{0}")
                if self.configs["dataset"] == "hotpotQA":
                    if use_context:
                        eval_prompt = RECUR_template.format(context=context, question = question,answer=answer)
                    else:
                        eval_prompt = RECUR_template.format(question = question,answer=answer)
                else:
                    eval_prompt = RECUR_template.format(question = question,answer=answer)
                the_evaluator.add_user_message(eval_prompt)
                evaluate_response = the_evaluator.get_completion()
                the_evaluator.add_assistant_message(evaluate_response)
                self.players.append(the_evaluator)
                self.all_raw_solutions.append(evaluate_response) 


        elif self.strategy == "CoT_Already_Evaluate":
            self.answer_value = self.example["answer"]
            # dataset: hotpotQA
            # CoT already generated, just need to evaluate whether it is correct or not
            # num_agents is the number of evaluator
            self._init_models()
            answers = self.example["answers"] # there are multiple answers
            context = self.example["supporting_paragraphs"] if use_context else ""
            assert self.num_agents == self.feedback_config["n"]
            for i in range(len(answers)):
                # add the answer as a new player
                answer = answers[i]
                new_player = self._create_player(f"thought_agent{i}")
                new_player.add_assistant_message(answer)
                self.players.append(new_player)
                the_evaluator = self._create_feedback(f"evaluate{i}_{0}")
                if self.configs["dataset"] == "hotpotQA":
                    if use_context:
                        eval_prompt = RECUR_template.format(context=context, question = question,answer=answer)
                    else:
                        eval_prompt = RECUR_template.format(question = question,answer=answer)
                else:
                    eval_prompt = RECUR_template.format(question = question,answer=answer)
                the_evaluator.add_user_message(eval_prompt)
                evaluate_response = the_evaluator.get_completion()
                the_evaluator.add_assistant_message(evaluate_response)
                self.players.append(the_evaluator)
                self.all_raw_solutions.append(evaluate_response)   
                    
        elif self.strategy == "Evaluate":
            # dataset: gsm8k_eval
            # Given question and answer evaluate whether it is correct or not
            self._init_models()
            self.answer_value = self.example["label"]

            assert self.num_rounds == self.feedback_config["n"]
            new_player = self._create_feedback(f"evaluate{0}")
            if self.configs["add_system"] == True:
                new_player.add_system_message(CoT_Eval_Analysis_SYSTEM)
            new_player.add_user_message(COT_template.format(question = question,answer=answer))
            cot_value_response = new_player.get_completion()
            new_player.add_assistant_message(cot_value_response)
            self.players.append(new_player)
            self.all_raw_solutions.append(cot_value_response)
                # self.all_solutions.append()

            
        self._log()
        return False
