import overall_utils
from .templates.math_prompts_v1 import *
from .templates.theoremQA_prompts import *
from gsm8k.evaluations.eval_gsm import solve_math_problems,parse_answer
from gsm8k.evaluations.eval_MATH import last_boxed_only_string,remove_boxed,grade_answer
from agents.Agents import ZEROSHOT_FEEDBACK,ZEROSHOTMADJUDGE,ZEROSHOTMAD


import copy
import logging
from langchain.schema import (
    ChatMessage,
    ChatResult,
    AIMessage,
    HumanMessage,
    SystemMessage,
)


def parse_pred_answer(dataset: str, generated_response: str, use_json_format=False,json_key="Answer"):
    if isinstance(generated_response, str):
        generated_response = [generated_response]
        
    if dataset == "gsm8k":
        if use_json_format:
            responses = []
            for gr in generated_response:
                try:
                    judge_response = eval(gr)
                except:
                    judge_response = {f"{json_key}":''}
                responses.append(judge_response[f"{json_key}"])
            return responses
        
        return [remove_boxed(last_boxed_only_string(gr)) for gr in generated_response]
    elif dataset == "counterintuitive_AR":
        return [parse_answer(gr) for gr in generated_response]
    elif dataset == "MATH":
        return [remove_boxed(last_boxed_only_string(gr)) for gr in generated_response]
    else:
        return [remove_boxed(last_boxed_only_string(gr)) for gr in generated_response]



class GSM8K_ZEROSHOTMAD(ZEROSHOTMAD):
    def __init__(self, models_tokenizers,question, answer,example,config) -> None:
        super().__init__(models_tokenizers,question, answer,example,config)

    def run(self):
        question = self.question; answer = self.answer
        summarize = self.configs["summarize"]
        partial_context = self.configs["partial_context"]
        strictly_last_round = self.configs["strictly_last_round"]
        use_llama_template = self.configs["use_llama_template"] if "use_llama_template" in self.configs else False
        self._init_players()


        # initialize first round
        if self.prompt_strategy in THEOREMQA_PROMPT_TEMPLATES:
            MAD_START = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy]
            if use_llama_template:
                MAD_START = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy+"_LLAMA"]
        else:
            MAD_START = MATH_MAD_TEMPLATES["MAD_START"]
            if use_llama_template:
                MAD_START = MATH_MAD_TEMPLATES["MAD_START_LLAMA"]

        initial_prompt = MAD_START.format(question= question)
        [agent.add_user_message(initial_prompt) for agent in self.players]
        current_round_players = self.players
        last_round_players = None
        summarize_players = None
        all_summarize_players = []

        # first round
        temp_config = copy.deepcopy(self.player_config)
        temp_config["n"] = self.num_agents
        temp_model = overall_utils._load_model(temp_config)
        temp_agent = self._create(temp_config,temp_model,self.player_model_name,f"1st_round_temp_agent")
        temp_agent.add_user_message(MAD_START.format(question= question))
        first_round_responses = temp_agent.get_completion()
        [agent.add_assistant_message(response) for agent,response in zip(self.players,first_round_responses)]
        # put all the tokens used in the frist agent just for the record
        self.players[0].prompt_token_used = temp_agent.prompt_token_used
        self.players[0].completion_token_used = temp_agent.completion_token_used
        parsed_answers = parse_pred_answer(self.configs["dataset"], first_round_responses)
        parsed_answers = [[_] for _ in parsed_answers]
        self.all_solutions.extend(parsed_answers)
        self.all_raw_solutions.append(first_round_responses)
        
        for round in range(1,self.num_rounds):
            if round != 0:
                if summarize:
                    summarize_players = [self._create_debate_player(f"summarize_player{_}_round{round}") for _ in range(self.num_agents)]
                    for idx in range(self.num_agents):
                        response = current_round_players[idx].conversations[-1]["content"]
                        # print(f"\n\nbefore summary: {len(response.split())}")
                        summarize_players[idx].add_user_message(MAD_SUMMARIZE.format(question=question,answer=response))
                        summary = summarize_players[idx].get_completion()
                        # print(f"after summary: {len(summary[0].split())}")
                        summarize_players[idx].add_assistant_message(summary)
                        current_round_players[idx].conversations.pop()
                        current_round_players[idx].add_assistant_message(summary)
                        self.all_raw_solutions.append(summary)
                    all_summarize_players.extend(summarize_players)
            for i in range(self.num_agents):
                if round != 0:
                    other_agents = current_round_players[:i] + current_round_players[i+1:]     
                    if strictly_last_round:
                        round_num = 2*round-1
                    else:
                        round_num = -1
                    if self.configs["dataset"] == "TheoremQA":
                        message = current_round_players[i].construct_message_from_other_players_theoremQA(other_agents,question,round=round_num)
                    else:
                        message = current_round_players[i].construct_message_from_other_players(other_agents,question,round=round_num)
                    current_round_players[i].add_user_message(message["content"])
                print(f"----round{round}----:")
                # print(f"{current_round_players[i].conversations}\n\n\n")
                # if round>0:
                #     for conv in current_round_players[i].conversations:
                #         print(conv)
                completion = current_round_players[i].get_completion(partial_context=partial_context,use_long_context=False)
                current_round_players[i].add_assistant_message(completion)
                parsed_answer = parse_pred_answer(self.configs["dataset"], completion)
                self.all_solutions.append(parsed_answer)
                self.all_raw_solutions.append(completion)
            # if round < self.num_rounds-1: # don't run last iter
            #     last_round_players = current_round_players
            #     current_round_players = [DEBATE_PLAYER(self.player_config, self.llm_agent_player,self.player_model_name, f"player{_}_round{round+1}") for _ in range(self.num_agents)]
            #     self.players.extend(current_round_players)
        self.players.extend(all_summarize_players)
        self._log()

        return False









class GSM8K_ZEROSHOTMADJUDGE(ZEROSHOTMADJUDGE):
    def __init__(self, models_tokenizers,question, answer,example,config) -> None:
        super().__init__(models_tokenizers,question, answer,example,config)
    def run(self):
        question = self.question; answer = self.answer
        self.init_players()

        # whether use json format
        self.json_format = self.configs["use_json"]
        if self.json_format == True:
            judge_user_template = MAD_JUDGE_JUDGE_USER
            ultimate_judge_user_template2 = MAD_JUDGE_JUDGE_USER_ADDITIONAL2
        else:
            judge_user_template = MAD_JUDGE_JUDGE_USER_NOJSON
            ultimate_judge_user_template2 = MAD_JUDGE_JUDGE_USER_NOJSON_ADDITIONAL2

        # create initial solution
        initial_prompt = MAD_JUDGE_START2.format(question= question)
        first_solution_player = self._create_player("first_solution_player")
        first_solution_player.add_system_message(initial_prompt)
        base_solution = first_solution_player.get_completion()
        first_solution_player.add_assistant_message(base_solution)
        self.base_solution = base_solution
        parsed_answer = parse_pred_answer(self.configs["dataset"], base_solution)
        self.all_solutions.append(parsed_answer)

        # first round
        self.players[0].add_system_message(MAD_JUDGE_PLAYER_SYSTEM.format(question = question))
        self.players[1].add_system_message(MAD_JUDGE_PLAYER_SYSTEM.format(question = question))
        self.players[2].add_system_message(MAD_JUDGE_JUDGE_SYSTEM.format(question = question))

        self.players[0].add_user_message(MAD_JUDGE_FIRST_PLAYER1_USER.format(solution = self.base_solution))
        player1_solution = self.players[0].get_completion()
        self.players[0].add_assistant_message(player1_solution)

        self.players[1].add_user_message(MAD_JUDGE_FIRST_PLAYER2_USER.format(solution = player1_solution))
        player2_solution = self.players[1].get_completion()
        self.players[1].add_assistant_message(player2_solution)

        self.players[2].add_user_message(judge_user_template.format(round="first",
                                        affirmative_ans=player1_solution,
                                        negative_ans=player2_solution))
        judge_response = self.players[2].get_completion()
        self.players[2].add_assistant_message(judge_response)
        self._append_to_all_solutions(judge_response)
        last_judge_response = self.all_solutions[-1]


        # debate rounds
        for round in range(self.num_rounds-1):
            if last_judge_response != '':
                break
            else:
                self.players[0].add_user_message(MAD_JUDGE_PLAYER_USER.format(solution = player2_solution))
                player1_solution = self.players[0].get_completion()
                self.players[0].add_assistant_message(player1_solution)

                self.players[1].add_user_message(MAD_JUDGE_PLAYER_USER.format(solution = player1_solution))
                player2_solution = self.players[1].get_completion()
                self.players[1].add_assistant_message(player2_solution)

                self.players[2].add_user_message(judge_user_template.format(round=self._round_dct(round+2),
                                        affirmative_ans=player1_solution,
                                        negative_ans=player2_solution))
                judge_response = self.players[2].get_completion()
                self.players[2].add_assistant_message(judge_response)
                self._append_to_all_solutions(judge_response)
        
        # additional technique
        last_judge_response = self.all_solutions[-1]
        if last_judge_response != '':
            self.final_solution = last_judge_response
        else:
            ultimate_judge = self._create_judge("ultimate_judge")
            ultimate_judge.add_system_message(MAD_JUDGE_JUDGE_SYSTEM.format(question = question))

            player1_solution = self.players[0].conversations[2].content # 1. system message 2. user message 3. assistant message
            player2_solution = self.players[1].conversations[2].content

            ultimate_judge.add_user_message(MAD_JUDGE_JUDGE_USER_ADDITIONAL1.format(affirmative_ans=player1_solution,
                                        negative_ans=player2_solution))
            ultimate_judge_response = ultimate_judge.get_completion()
            ultimate_judge.add_assistant_message(ultimate_judge_response)

            ultimate_judge.add_user_message(ultimate_judge_user_template2.format(question = question))
            ultimate_judge_response2 = ultimate_judge.get_completion()
            # print(f"ultimate_judge_response2: {ultimate_judge_response2}")
            # import json
            # json.dump(ultimate_judge_response2, open("ultimate_judge_response2.json", "w"))
            # print(json.dumps(ultimate_judge_response2))
            ultimate_judge.add_assistant_message(ultimate_judge_response2)
            self._append_to_all_solutions(ultimate_judge_response2)
            ultimate_ans = self.all_solutions[-1]
            if ultimate_ans != '':
                self.final_solution = ultimate_ans
            self.players.append(ultimate_judge)

        self._log()
        return False


reasoning_plan = ""
class GSM8K_ZEROSHOT_SELFDISCOVER(ZEROSHOT_FEEDBACK):
    def __init__(self, models_tokenizers,question, answer,example,config) -> None:
        super().__init__(models_tokenizers,question, answer,example,config)


    def run(self):
        global reasoning_plan
        self._init_models()
        self.answer_value = None
        if reasoning_plan == "":
            # SELECT
            agent = self._create_player(f"SELECT")
            task_examples = SELF_DISCOVER_TASKEXAMPLES[self.configs["dataset"]]
            agent.add_user_message(SELECT.format(reasoning_modules = "\n".join(reasoning_modules),task_examples=task_examples))
            cot_response = agent.get_completion(temperature=0, n=1)
            agent.add_assistant_message(cot_response)
            self.players.append(agent)

            # ADAPT
            agent = self._create_player(f"ADAPT")
            agent.add_user_message(ADAPT.format(reasoning_modules = cot_response,task_examples=task_examples))
            cot_response = agent.get_completion(temperature=0, n=1)
            agent.add_assistant_message(cot_response)
            self.players.append(agent)

            # IMPLEMENT
            agent = self._create_player(f"IMPLEMENT")
            agent.add_user_message(IMPLEMENT.format(reasoning_modules = "\n".join(reasoning_modules),task_examples=task_examples))
            cot_response = agent.get_completion(temperature=0, n=1)
            agent.add_assistant_message(cot_response)
            self.players.append(agent)
            reasoning_plan = cot_response

        question = self.question; answer = self.answer
        self.answer_type=None
        use_llama_template = self.configs["use_llama_template"] if "use_llama_template" in self.configs else False

        agent = self._create_player(f"agent0")
        agent.add_user_message(EXECUTE.format(question = question,reasoning_structure=reasoning_plan))
        cot_response = agent.get_completion()
        agent.add_assistant_message(cot_response)
        self.players.append(agent)


        self._log()
        return False




class GSM8K_ZEROSHOT_FEEDBACK(ZEROSHOT_FEEDBACK):
    def __init__(self, models_tokenizers,question, answer,example,config) -> None:
        super().__init__(models_tokenizers,question, answer,example,config)


    def run(self):
        question = self.question; answer = self.answer
        self.answer_type=None
        use_llama_template = self.configs["use_llama_template"] if "use_llama_template" in self.configs else False
        if self.prompt_strategy == "":
            COT_template = COT
            if use_llama_template:
                COT_template = COT_LLAMA
        elif self.prompt_strategy in THEOREMQA_PROMPT_TEMPLATES:
            COT_template = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy]
            if use_llama_template:
                COT_template = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy+"_LLAMA"]
        if "use_json" in self.configs:
            if self.configs["use_json"] == True:
                self.cot_resulst_json = True
                COT_template = COT_JSON

        if self.strategy == "ZeroShot_Feedback_JID":
            self.answer_value = parse_pred_answer(self.configs["dataset"], self.answer)
            self._init_players()
            for i in range(self.num_rounds):
                self.players[i*2].add_user_message(COT_template.format(question = question))
                cot_response = self.players[i*2].get_completion()
                self.players[i*2].add_assistant_message(cot_response)
                parsed_answer = parse_pred_answer(self.configs["dataset"], cot_response)
                self.all_solutions.append(parsed_answer)
                self.all_raw_solutions.append(cot_response)

                self.players[i*2+1].add_user_message(JID_FEEDBACK.format(question = question,answer=cot_response))
                reflect_response = self.players[i*2+1].get_completion()
                self.players[i*2+1].add_assistant_message(reflect_response)
                parsed_answer = parse_pred_answer(self.configs["dataset"], reflect_response)
                self.all_solutions.append(parsed_answer)
                self.all_raw_solutions.append(reflect_response)

        elif self.strategy == "ZeroShotReflexion":
            nooracle = self.configs["nooracle"] if "nooracle" in self.configs else False
            if self.prompt_strategy in THEOREMQA_PROMPT_TEMPLATES:
                REFLEXION_COT_PROPOSE = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy][0]
                REFLEXION_COT_REFLECT = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy][1]
                REFLEXION_COT_COMBO_PROPOSE = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy][2]
                if use_llama_template:
                    REFLEXION_COT_PROPOSE = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy+"_LLAMA"][0]
                    REFLEXION_COT_REFLECT = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy+"_LLAMA"][1]
                    REFLEXION_COT_COMBO_PROPOSE = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy+"_LLAMA"][2]
            else:
                REFLEXION_COT_PROPOSE = REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy][0]
                REFLEXION_COT_REFLECT = REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy][1]
                REFLEXION_COT_COMBO_PROPOSE = REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy][2]
                if use_llama_template:
                    REFLEXION_COT_PROPOSE = REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy+"_LLAMA"][0]
                    REFLEXION_COT_REFLECT = REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy+"_LLAMA"][1]
                    REFLEXION_COT_COMBO_PROPOSE = REFLEXION_PROMPT_TEMPLATES[self.prompt_strategy+"_LLAMA"][2]
            if nooracle:
                REFLEXION_COT_REFLECT = REFLEXION_COT_REFLECT_nooracle
                REFLECTION_HEADER = REFLECTION_HEADER_nooracle
            self.answer_value = parse_pred_answer(self.configs["dataset"], self.answer)
            self._init_models()
            self.reflections = []
            last_trial = self.configs["last_trial"]
            few_shot = self.configs["few_shot"]
            cheat = self.configs["cheat"]
            self_value = self.configs["self_value"]
            players_by_round = [[] for i in range(self.num_rounds)]

            # get intitial solution and feedback
            proposer = self._create_player(f"round0_proposer")
            proposer.add_user_message(REFLEXION_COT_PROPOSE.format(question = question))
            cot_response = proposer.get_completion()
            proposer.add_assistant_message(cot_response)
            players_by_round[0].append(proposer)
            parsed_answer = parse_pred_answer(self.configs["dataset"], cot_response)
            self.all_solutions.append(parsed_answer)
            self.all_raw_solutions.append(cot_response)

            def _format_reflexion_reflect(question, prev_answer,cur_reflections,last_trial):
                global REFLECTION_HEADER
                last_trial_str = ""
                reflect_str = ""
                if last_trial:
                    last_trial_str = LAST_TRIAL_HEADER + f'Question: {question}\n' + prev_answer + '\n(END PREVIOUS TRIAL)\n'
                    reflect_str = REFLECTION_AFTER_LAST_TRIAL_HEADER + 'Reflections:\n- ' + '\n- '.join([r.strip() for r in self.all_feedbacks])
                else:
                    reflect_str = REFLECTION_HEADER + 'Reflections:\n- ' + '\n- '.join([r.strip() for r in cur_reflections])
                
                return last_trial_str + reflect_str
            def _reflexion_reflect(reflector, question,prev_answer, fewhot=False,last_trial=False,cheat=False):
                if fewhot == False:
                    reflector.add_user_message(REFLEXION_COT_REFLECT.format(question = question,prev_ans=prev_answer))
                else:
                    reflector.add_user_message(REFLEXION_COT_FEWSHOT_REFLECT.format(question = question,prev_ans=prev_answer))
                reflect_responses = reflector.get_completion()
                reflector.add_assistant_message(reflect_responses)

                reflection_strs = []
                for reflect_response in reflect_responses:
                    cur_reflections = self.all_feedbacks + [reflect_response]
                    reflection_str = _format_reflexion_reflect(question, prev_answer,cur_reflections,last_trial)
                    reflection_strs.append(reflection_str)
                self.all_feedbacks.extend(reflect_responses)
                return reflection_strs
            
            for i in range(self.num_rounds-1):
                if cheat:
                    gold = parse_pred_answer(self.configs["dataset"], answer)
                    correct = self._check_correctness(gold, cot_response)
                    if correct:
                        break
                if self_value:
                    # create the evaluator
                    self.evaluate_config = self.configs["llms"]["llm_agent_evalaute"]
                    self.evaluate_model_name = self.evaluate_config["model_name"]
                    self.llm_agent_evalaute = overall_utils._load_model(self.evaluate_config)
                    evaluator = self._create(self.evaluate_config,self.llm_agent_evalaute,self.evaluate_model_name,f"evaluate{i}")

                    evaluator.add_user_message(CoT_Eval_Analysis["COT_EVAL_EFFECT2"].format(question = question,answer=cot_response[0]))
                    cot_value_response = evaluator.get_completion()
                    evaluator.add_assistant_message(cot_value_response)
                    players_by_round[i].append(evaluator)
                    self.all_values.append(cot_value_response)
                
                print(f"----round{i+1}----:")
                # get new feedback
                reflector = self._create_feedback(f"round{i+1}_reflector")
                reflect_responses = _reflexion_reflect(reflector,question,cot_response[0], fewhot=few_shot,last_trial=last_trial,cheat=cheat)
                players_by_round[i+1].append(reflector)

                # get new solution
                for j, reflect_response in enumerate(reflect_responses):
                    proposer = self._create_player(f"round{i+1}_proposer{j}")
                    proposer.add_user_message(REFLEXION_COT_COMBO_PROPOSE.format(reflections=reflect_response,question = question,scratchpad=""))
                    cot_response = proposer.get_completion()
                    proposer.add_assistant_message(cot_response)
                    players_by_round[i+1].append(proposer)
                    parsed_answer = parse_pred_answer(self.configs["dataset"], cot_response)
                    self.all_solutions.append(parsed_answer)
                    self.all_raw_solutions.append(cot_response)
            for i in range(len(players_by_round)):
                self.players.extend(players_by_round[i])
        elif self.strategy == "ZeroShot_CoT":
            use_system = False
            if "use_system" in self.configs:
                use_system = self.configs["use_system"]
            self.answer_value = parse_pred_answer(self.configs["dataset"], self.answer)
            self._init_CoT_players()
            assert self.num_rounds == self.player_config["n"]
            cot_prompt = COT_template.format(question = question)
            if use_system:
                self.players[0].add_system_message(COT_template)
                self.players[0].add_user_message(f'Question: {question}\n')
            else:
                self.players[0].add_user_message(cot_prompt)
            cot_responses = self.players[0].get_completion()
            self.players[0].add_assistant_message(cot_responses)
            parsed_answers = parse_pred_answer(self.configs["dataset"], cot_responses, use_json_format=self.cot_resulst_json,json_key="Answer") 
            self.all_solutions.append(parsed_answers)
            self.all_raw_solutions.append(cot_responses)

            if "self_value" in self.configs:
                if self.configs["self_value"] == True:
                    for cot_response in cot_responses:
                        new_player = self._create_feedback(f"evaluate{i}")
                        new_player.add_user_message(COT_VALUE.format(question = question,answer=cot_response))
                        cot_value_response = new_player.get_completion()
                        new_player.add_assistant_message(cot_value_response)
                        self.players.append(new_player)
                        try:
                            cot_value_response = eval(cot_value_response)
                            self.all_values.append(cot_value_response["Evaluation"])
                        except:
                            self.all_values.append(cot_value_response)
        elif self.strategy == "CoT_Evaluate":
            # dataset: gsm8k
            # generate CoT first, and then evaluate whether it is correct or not
            # num_agents is the number of evaluator
            if self.prompt_strategy in THEOREMQA_PROMPT_TEMPLATES:
                COT_template = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy][0]
                RECUR_template = THEOREMQA_PROMPT_TEMPLATES[self.prompt_strategy][1]
            else:
                COT_template = COT
                RECUR_template = CoT_Eval_Analysis["COT_EVAL_EFFECT2"]
            
            # first generate CoT
            use_system = False
            if "use_system" in self.configs:
                use_system = self.configs["use_system"]
            self.answer_value = parse_pred_answer(self.configs["dataset"], self.answer)
            self._init_CoT_players()
            assert self.num_rounds == self.player_config["n"]
            cot_prompt = COT_template.format(question = question)
            if use_system:
                self.players[0].add_system_message(COT_template)
                self.players[0].add_user_message(f'Question: {question}\n')
            else:
                self.players[0].add_user_message(cot_prompt)
            cot_responses = self.players[0].get_completion()
            self.players[0].add_assistant_message(cot_responses)
            parsed_answers = parse_pred_answer(self.configs["dataset"], cot_responses, use_json_format=self.cot_resulst_json,json_key="Answer") 
            self.all_solutions.append(parsed_answers)
            self.all_raw_solutions.append(cot_responses)

            # then evaluate
            answers = cot_responses # there are multiple answers
            assert self.num_agents == self.feedback_config["n"]
            for i in range(len(answers)):
                # add the answer as a new player
                answer = answers[i]
                the_evaluator = self._create_feedback(f"evaluate{i}_{0}")
                the_evaluator.add_user_message(RECUR_template.format(question = question,answer=answer))
                evaluate_response = the_evaluator.get_completion()
                the_evaluator.add_assistant_message(evaluate_response)
                self.players.append(the_evaluator)

        elif self.strategy == "CoT_Already_Evaluate":
            self.answer_value = self.example["answer"]
            # dataset: gsm8k
            # CoT already generated, just need to evaluate whether it is correct or not
            # num_agents is the number of evaluator
            self._init_models()
            prompts = self.example["prompts"]
            answers = self.example["answers"] # there are multiple answers
            assert self.num_agents == self.feedback_config["n"]
            for i in range(len(answers)):
                # add the answer as a new player
                prompt = prompts[i]
                answer = answers[i]
                new_player = self._create_player(f"agent{i}")
                new_player.add_user_message(prompt)
                new_player.add_assistant_message(answer)
                self.players.append(new_player)
                the_evaluator = self._create_feedback(f"evaluate{i}_{0}")
                the_evaluator.add_user_message(COT_template.format(question = question,answer=answer))
                evaluate_response = the_evaluator.get_completion()
                the_evaluator.add_assistant_message(evaluate_response)
                self.players.append(the_evaluator)
                self.all_raw_solutions.append(evaluate_response)   
                    
        elif self.strategy == "Evaluate":
            # dataset: gsm8k_eval
            # Given question and answer evaluate whether it is correct or not
            self._init_models()
            self.answer_value = self.example["label"]

            assert self.num_rounds == self.feedback_config["n"]
            new_player = self._create_feedback(f"evaluate{0}")
            if self.configs["add_system"] == True:
                new_player.add_system_message(CoT_Eval_Analysis_SYSTEM)
            new_player.add_user_message(COT_template.format(question = question,answer=answer))
            cot_value_response = new_player.get_completion()
            new_player.add_assistant_message(cot_value_response)
            self.players.append(new_player)
            self.all_raw_solutions.append(cot_value_response)
                # self.all_solutions.append()

            
        self._log()
        return False
