import json

import os
import sys

import nltk
import torch
from argparse import ArgumentParser
from pathlib import Path
from typing import List, Union, Dict

import logging
import openai
import pandas as pd
import re

from dotenv import load_dotenv
from nltk import sent_tokenize, word_tokenize, WordNetLemmatizer
from tenacity import retry, wait_fixed, stop_after_attempt, wait_exponential

from colbert.gpt import GPT, clean_response


def re_extract(prefix: str, target: str, truncate: bool = False):
    reg = rf"[\'\"]?{prefix}[\'\"]?: ([\w\W]+)"

    next_line_removed_target = (
        target.split("\n")[0].strip() if truncate else target.strip()
    )
    res = re.findall(reg, next_line_removed_target)
    return_target = res[0] if len(res) >= 1 else next_line_removed_target
    return_target = return_target.replace("\t", " ")

    if (return_target.startswith('"') and return_target.endswith('"')) or (
        return_target.startswith("'") and return_target.endswith("'")
    ):
        return_target = return_target[1:-1]

    return return_target


class PromptGenerator:
    # this example is for general retrieval
    msmarco_examples = {
        # these examples are from msmarco
        1: "Example 1:\n"
        "Search Query: walgreens store sales average\n"
        "Document 1: The average Walgreens salary ranges from approximately $15,000 per year for Customer Service Associate / Cashier to $179,900 per year for District Manager. Average Walgreens hourly pay ranges from approximately $7.35 per hour for Laboratory Technician to $68.90 per hour for Pharmacy Manager. Salary information comes from 7,810 data points collected directly from employees, users, and jobs on Indeed.\n"
        "Document 2: th store in 1984, reaching $4 billion in sales in 1987, and $5 billion two years later. Walgreens ended the 1980s with 1,484 stores, $5.3 billion in revenues and $154 million in profits. However, profit margins remained just below 3 percent of sales, and returns on assets of less than 10 percent.\n"
        "Document 3: The number of Walgreen stores has risen from 5,000 in 2005 to more than 8,000 at present. The average square footage per store stood at approximately 10,200 and we forecast the figure to remain constant over our review period. Walgreen earned $303 as average front-end revenue per store square foot in 2012.\n"
        "Important words: [store, walgreens, profit, average]"
        "Intent: "
        "The query asked about the walgreens store. "
        "The sales in the query can be either store's profit or the salary of the labours. "
        "From the first retrieved document, the salary information is the key topic. "
        "However, other two documents mentions about the walgreen's profit and 'profit' is selected as important word. "
        "More over, document 2 mentions the profit by time change and 3 mentions the profit in size of the store."
        "Therefore, the query is looking for the sales of the walgreens store in average by the time and the size of the store.\n"
        "Missing Query terms: [factor, time, store, size]\n",
        2: "Example 2:\n"
        "Search Query: dna in bacteria\n"
        "Document 1: 'Bacterial DNA in Human Genomes. A new study finds strong evidence that bacteria can transfer genes into human genomes, especially in cancer cells. By Ed Yong | June 20, 2013. Pseudomonas, one of the bacteria groups that have transferred genes to human'\n"
        "Document 2: Bacterial DNA – a circular chromosome plus plasmids. The DNA of most bacteria is contained in a single circular molecule, called the bacterial chromosome. The chromosome, along with several proteins and RNA molecules, forms an irregularly shaped structure called the nucleoid. This sits in the cytoplasm of the bacterial cell. In addition to the chromosome, bacteria often contain plasmids – small circular DNA molecules. Bacteria can pick up new plasmids from other bacterial cells (during conjugation) or from the environment.\n"
        "Document 3: · just now. Report Abuse. bacteria's DNA floats freely through out its cytoplasm because bacteria are prokaryotic which means they are a type of cell that has no membrane bound organelles so no nucleus. this is unlike eukaryotic cells (plant and animals cells) in which the DNA is located inside the nucleus. Source(s): my bio class.\n"
        "Important words: [Genomes, chromosome, cytoplasm, nucleus, RNA]"
        "Intent: "
        "The query asked about DNA in bacteria "
        "This query probably looking for either the type of DNAs in bacteria or can DNA be found in the bacteria "
        "From retrieved documents, bacteria has DNA. "
        "The query may look for type of DNAs in the bacteria. "
        "Therefore, the query is looking for biological or chemical substances that can be found in the DNA of the bacteria such as genomes, chromosome or cytoplasm.\n"
        "Missing Query terms: substances, nucleus, genomes\n",
    }
    dataset_specific_examples = {
        # instruction ref: TARI,
        "webis-touche2020": {
            1: "Example 1:\n"
            "Observe[Query]: is wrestling more athletic or competitive\n"
            # True
            "Observe[Doc 1]: Professional wrestlers are better athletes than football players. In closing, as per the rules, I cannot make any arguments this round. I would however like to thank my opponent for a pretty good debate. To end the debate let's look at the following points \n"
            # False
            "Observe[Doc 2]: Professional Wrestling is more dangerous than MMA or boxing. Honestly, I think you werent listening to me. WHETHER OR NOT YOU GET INJURED SAYS NOTHING TO THE DIFFICULTY OF THE SPORT. I rest my case.\n"
            "Think[Intent]: "
            "The query asked about whether the wrestling is more athletic or competitive. "
            "The first document provides the argument that wrestling is more athletic than football. "
            "In addition, the second document also provides the argument about the wrestling but it says wrestling is more dangerous than MMA or boxing. "
            "This search engine takes argumentative sentence as query and the retrieved documents are counterarguments of it. "
            "By taking the information from first document (relevant) and avoiding the content from second document (partially irrelevant), "
            "the query is looking for the argument that wrestling is more athletic than other sports such as football even it is more dangerous than MMA or boxing.\n"
            "Expand[Query]: [other sports, football, dangerous, MMA, boxing]\n",
            2: "Example 2:\n" "Observe[Query]: who have landed on the moon?"
            # True
            "Observe[Doc 1]: I don't believe that US spaceship have ever landed in the moon. Its not my fault that you get confused easily, you know by fact that astronauts need a lunar spacecraft to get to the moon. Now would you like to have a crack and guess how much it would cost to take one more extra person (tourist) to the moon let alone a few more to profit from it? The cost rate of a manned flight per kilo is ridiculous especially when you have to factor in tourists.\n"
            # False
            'Observe[Doc 2]: Colonization of Space, Luna, and Mars in the next 20 years. You said: "You say that like it\'s a fact. Scientists have not been able to agree on how our moon was actually formed and there are several different theories. Until we do a little more research and visit the moon again, there\'s no real way to know for sure." You\'re right that there are several theories, but there are several theories for just about everything. Creationism and geocentricity are both "theories."\n'
            "Think[Intent]: "
            "The query asked about who have landed on the moon. "
            "The first document provides the argument that US spaceship have never landed on the moon. "
            "In addition, the second document also provides the argument about the colonization of the moon. "
            "Therefore, we can know that first directly counter argues the query and second document mentions space and moon which is partially relevant to the query. "
            "This search engine takes argumentative sentence as query and the retrieved documents are counterarguments of it. "
            "By taking the information from first document (relevant) and avoiding the content from second document (partially relevant), "
            "the query is looking for the argument that 'did US spaceship have landed on the moon with Apollo 11.'\n"
            "Expand[Query]: [US, spaceship, Apollo 11, evidence]\n",
        },
        "nfcorpus": {
            1: "Example 1:\n"
            "Observe[Query]: are statins used after cancer diagnosis\n"
            # True
            "Observe[Doc 1]: Statin use after diagnosis of breast cancer and survival: a population-based cohort study. BACKGROUND: Preclinical studies have shown that statins, particularly imvastatin, can prevent growth in breast cancer cell lines and animal models. We investigated whether statins used after breast cancer diagnosis reduced the risk of breast cancer-specific, or all-cause, mortality in a large cohort of breast cancer patients. \n"
            # False
            "Observe[Doc 2]: Significant changes in dietary intake and supplement use after breast cancer diagnosis in a UK multicentre study. The diagnosis of cancer can motivate survivors to alter their lifestyle habits. Healthcare providers need to be aware of what changes patients are likely to make in order to derive more pertinent recommendations; however, few studies have reported pre- and post-diagnostic lifestyle behaviours.\n"
            "Think[Intent]: "
            "The query asked about whether the statins are used after cancer diagnosis. "
            "The first document provides the scientific information about the diagnosis of breast cancer and survival after using statin. "
            "In addition, the second document provides information about the dietary intake and supplement use after breast cancer diagnosis. "
            "Therefore, we can know that first document is more relevant to the query while the second document does not. "
            "This search engine takes query about medical and the retrieved documents provides bio-medical articles. "
            "By taking the information from first document (relevant) and avoiding the content from second document (partially irrelevant), "
            "the query is looking for the information about the use of statins can affect the survival rate of breast cancer with scientific and medical diagnosis.\n"
            "Expand[Query]: [breast cancer, survival, mortality, cohort, effect]\n",
            2: "Example 2:\n" "Observe[Query]: what is the healing meaning of saffron?"
            # True
            "Observe[Doc 1]: Therapy with saffron and the goddess at Thera. This paper presents a new interpretation of a unique Bronze Age (c. 3000-1100 BCE) Aegean wall painting in the building of Xeste 3 at Akrotiri,Thera. Crocus carturightianus and its active principle, saffron, are the primary subjects at Xeste 3. Several lines of evidence suggest that the meaning of these frescoes concerns saffron and healing.\n"
            # False
            "Observe[Doc 2]: American cranberry (Vaccinium macrocarpon) extract affects human prostate cancer cell growth via cell cycle arrest by modulating expression of cell... Prostate cancer is one of the most common cancers in the world, and its prevalence is expected to increase appreciably in the coming decades. As such, more research is necessary to understand the etiology, progression and possible preventative measures to delay or to stop the development of this disease.\n"
            "Think[Intent]: "
            "The query asked about the meaning of saffron in terms of healing.  "
            "The first document provides the information about the therapy with saffron and the goddess at Thera. "
            "However, the second document provides the information about the American cranberry extract affects human prostate cancer cell growth. "
            "Therefore, we can know that first document is more relevant to the query while the second document does not. "
            "This search engine takes query about medical and the retrieved documents provides bio-medical articles. "
            "By taking the information from first document (relevant) and avoiding the content from second document (partially irrelevant), "
            "the query is looking for the information about saffron whether it can affect the healing of human body or not with scientific and medical information.\n"
            "Expand[Query]: [therapy, goddess, therapy, frescoes, health]\n",
        },
        "scidocs": {
            1: "Example 1:\n" "Observe[Query]: what type of presentation is quiz\n"
            # True
            "Observe[Doc 1]: Effects of quiz-style information presentation on user understanding. This paper proposes quiz-style information presentation for interactive systems as a means to improve user understanding in educational tasks. Since the nature of quizzes can highly motivate users to stay voluntarily engaged in the interaction and keep their attention on receiving information, it is expected that information presented as quizzes can be better understood by users. \n"
            # False
            "Observe[Doc 2]: AUTOMATIC GENERATION OF PRESENTATION SLIDES FOR ACADEMIC PAPERS USING INTEGER LINEAR PROGRAMMING. Presentations are one of the most common and effective ways of communicating the overview of a work to the audience. Given a specialized paper, programmed era of introduction slides diminishes the exertion of the moderator and aides in making an organized synopsis of the paper.\n"
            "Think[Intent]: "
            "The query asked about what type of presentation is quiz. "
            "The first document provides the information about the quiz-style information presentation for interactive systems. "
            "However, the second document provides the information about the automatic generation of presentation slides for academic papers. "
            "Therefore, we can know that first document is more relevant to the query while the second document does not. "
            "This search engine takes query about scientific article's title and the retrieved documents provides the scientific articles that is being cited by the article title given from the query. "
            "By taking the information from first document (relevant) and avoiding the content from second document (barely relevant), "
            "the query is looking for the scientific papers that is being cited by the article, the effectiveness of quiz-style information presentation for interactive systems.\n"
            "Expand[Query]: [information, understanding, interactive, engagement, learning outcomes]\n",
            2: "Example 2:\n" "Observe[Query]: what is the role of cns immune cells"
            # True
            "Observe[Doc 1]: High-Dimensional Single-Cell Mapping of Central Nervous System Immune Cells Reveals Distinct Myeloid Subsets in Health, Aging, and Disease. Individual reports suggest that the central nervous system (CNS) contains multiple immune cell types with diverse roles in tissue homeostasis, immune defense, and neurological diseases.\n"
            # False
            "Observe[Doc 2]: Timing to Perfection: The Biology of Central and Peripheral Circadian Clocks. The mammalian circadian system, which is comprised of multiple cellular clocks located in the organs and tissues, orchestrates their regulation in a hierarchical manner throughout the 24\u00a0hr of the day. At the top of the hierarchy are the suprachiasmatic nuclei, which synchronize subordinate organ and tissue clocks using electrical, endocrine, and metabolic signaling pathways that impact the molecular mechanisms of cellular clocks.\n"
            "Think[Intent]: "
            "The query asked about the role of cns immune cells.  "
            "The first document provides the information about the central nervous system (CNS) contains multiple immune cell types with diverse roles in tissue homeostasis, immune defense, and neurological diseases. "
            "However, the second document provides the information about the mammalian circadian system, which is comprised of multiple cellular clocks located in the organs and tissues, orchestrates their regulation in a hierarchical manner throughout the 24\u00a0hr of the day. "
            "Therefore, we can know that first document is more relevant to the query while the second document does not. "
            "This search engine takes query about scientific article's title and the retrieved documents provides the scientific articles that is being cited by the article title given from the query. "
            "By taking the information from first document (relevant) and avoiding the content from second document (barely relevant), "
            "the query is looking for the scientific papers that is being cited by the article, the role of central nervous system (CNS) immune cells.\n"
            "Expand[Query]: [central nervous system, myeloid subsets, aging, disease]\n",
        },
        "fiqa": {
            1: "Example 1:\n" "Observe[Query]: who is mark thiel?\n"
            # True
            "Observe[Doc 1]: God forbid there is some differing point of views on a tech company's board. Politics aside, Thiel is a genius and a visionary - not to mention one of the huge catalysts to not only Facebook's growth, but also a mentor to Zuckerberg as well. \n"
            # False
            "Observe[Doc 2]: Thiel is involved with at least 15 immense internet companies, not to mention his past ownership's. It's easy to draw conclusions from a-b without actually looking into all the details and complexity,  as long as it makes sense for you right?\n"
            "Think[Intent]: "
            "The query asked about who is mark thiel. "
            "The first document provides the information about the Thiel is a genius and a visionary - not to mention one of the huge catalysts to not only Facebook's growth, but also a mentor to Zuckerberg as well. "
            "However, the second document provides the information about the Thiel is involved with at least 15 immense internet companies, not to mention his past ownership's. "
            "Therefore, we can know that first document is more relevant to the query while the second document partially relevant to the question. "
            "This search engine takes query about financial topic and the retrieved documents provides the financial web article paragraph that can answer the query. "
            "By taking the information from first document (relevant) and avoiding the content from second document (partially irrelevant), "
            "the query is looking for the financial web article paragraph that can answer the question, who is mark thiel.\n"
            "Expand[Query]: [technology, company, internet, facebook]\n",
            2: "Example 2:\n" "Observe[Query]: are chicken molecule equal to chicken"
            # False
            'Observe[Doc 1]: No, it\'s not fair usage at all. Maybe meets a legal definition of fair usage, but it\'s not what we\'d consider a ""straight faced"" claim. Any amount of chicken added to a recipe is ""100%"" chicken. A molecule of chicken in a recipe qualifies it to include ""100% chicken."" It\'s tautological. It\'s designed to make you believe the product is more wholesome or free of additives than it actually is. It\'s a psychological scam, like pretty much all packaging and marketing.\n'
            # False
            "Observe[Doc 2]: Timing to Perfection: There is an alternative if something (i.e., chicken) is 100% chicken.  If it's anything other than straight chicken, it obviously isn't 100% chicken. It's only deceptive if you literally don't think about it at all. Obviously a chicken meatball isn't 100% chicken. Then it would just be ground chicken.  Do you have any evidence whatsoever of the intent behind the law, or does it just feel that way to you?\n"
            "Think[Intent]: "
            "The query asked about are chicken molecule equal to chicken.  "
            "The first document provides the information about the fair usage of chicken in the recipe. If 100% chicken is added to the recipe, it is still 100% chicken. "
            "The second document provides the information about the alternative if something (i.e., chicken) is 100% chicken."
            "Therefore, we can know that both documents are relevant to the query. "
            "This search engine takes query about financial topic and the retrieved documents provides the financial web article paragraph that can answer the query. "
            "By taking the information from first document (relevant) and avoiding the content from second document (partially irrelevant), "
            "the query is looking for the financial web article paragraph that can answer the question, does chicken recipe with 100% really only use chicken or does the company uses some other chicken-like molecules?\n"
            "Expand[Query]: [usage, alternative, recipe, psychological scam]\n",
        },
        "trec-covid-v2": {
            1: "Example 1:\n"
            "Observe[Query]: what organisms infest ferrets with coronavirus\n"
            # True
            "Observe[Doc 1]: Coronavirus Infection in Ferrets: Antigen Distribution and Inflammatory Response. Multisystemic granulomatous lesions are the most common finding in ferrets infected by ferret systemic coronavirus (FRSCV). To characterize the inflammatory response developed against this virus, lesions from 4 naturally infected ferrets were examined. \n"
            # False
            "Observe[Doc 2]: SARS virus infection of cats and ferrets. The reservoir of the coronavirus isolated from patients with severe acute respiratory syndrome (SARS)(1,2) is still unknown, but is suspected to have been a wild animal species. Here we show that ferrets (Mustela furo) and domestic cats (Felis domesticus) are susceptible to infection by SARS coronavirus (SCV) and that they can efficiently transmit the virus to previously uninfected animals that are housed with them.\n"
            "Think[Intent]: "
            "The query asked about what organisms infest ferrets with coronavirus. "
            "The first document provides the information about the Coronavirus Infection in Ferrets: Antigen Distribution and Inflammatory Response. "
            "The second document provides the information about the SARS virus infection of cats and ferrets. "
            "Therefore, we can know that first document is more relevant to the query while the second document partially relevant to the question as it talks about SARS not COVID. "
            "This search engine takes query about COVID-19 related topics and the retrieved documents provides the bio-medical literature articles that can answer the query. "
            "By taking the information from first document (relevant) and avoiding the content from second document (barely relevant), "
            "the query is looking for the bio-medical literature articles that can answer the question, "
            "which creatures delivers coronavirus (COVID-19) to ferrets.\n"
            "Expand[Query]: [covid-19, Transmission, Inflammatory, Granulomas, Infection]\n",
            2: "Example 2:\n" "Observe[Query]: what is robot reviewer"
            # False
            "Observe[Doc 1]: Automating Biomedical Evidence Synthesis: RobotReviewer.We present RobotReviewer, an open-source web-based system that uses machine learning and NLP to semi-automate biomedical evidence synthesis, to aid the practice of Evidence-Based Medicine. RobotReviewer processes full-text journal articles (PDFs) describing randomized controlled trials (RCTs).\n"
            # False
            "Observe[Doc 2]: Prioritising references for systematic reviews with RobotAnalyst: A user study. Screening references is a time-consuming step necessary for systematic reviews and guideline development. Previous studies have shown that human effort can be reduced by using machine learning software to prioritise large reference collections such that most of the relevant references are identified before screening is completed.\n"
            "Think[Intent]: "
            "The query asked about what is robot reviewer.  "
            "The first document provides the information about the RobotReviewer, an open-source web-based system that uses machine learning and NLP to semi-automate biomedical evidence synthesis, to aid the practice of Evidence-Based Medicine. "
            "The second document provides the information about the Prioritising references for systematic reviews with RobotAnalyst: A user study. "
            "Therefore, we can know that both documents are relevant to the query. "
            "This search engine takes query about COVID-19 related topics and the retrieved documents provides the bio-medical literature articles that can answer the query. "
            "By taking the information from first document (relevant) and avoiding the content from second document (barely relevant), "
            "the query is looking for the bio-medical literature articles that can answer the question, what is robot reviewer.\n"
            "Expand[Query]: [machine, learning, NLP]\n",
        },
        "scifact": {
            1: "Example 1:\n"
            "Observe[Query]: folic acid benefits and side effects, pregnancy\n"
            # True
            "Observe[Doc 1]: Folic acid supplementation and dietary folate intake, and risk of preeclampsia. Background/Objectives:Folic acid supplementation has been suggested to reduce the risk of preeclampsia. However, results from few epidemiologic studies have been inconclusive. We investigated the hypothesis that folic acid supplementation and dietary folate intake before conception and during pregnancy reduce the risk of preeclampsia.\n"
            # False
            "Observe[Doc 2]: Effect of women's nutrition before and during early pregnancy on maternal and infant outcomes: a systematic review. Current understanding of biologic processes indicates that women's nutritional status before and during early pregnancy may play an important role in determining early developmental processes and ensuring successful pregnancy outcomes.\n"
            "Think[Intent]: "
            "The query asked about folic acid benefits and side effects during pregnancy. "
            "The first document provides the information about the Folic acid supplementation and dietary folate intake, and risk of preeclampsia. "
            "The second document is about women's nutrition before and during early pregnancy on maternal and infant outcomes. "
            "Therefore, we can know that first document is more relevant to the query while the second document is partially relevant to the query. "
            "This search engine takes query about evidence containing abstracts and the retrieved documents provides the scientific claims from pubmed that can verify the claim given by the query is true. "
            "By taking the information from first document (relevant) and avoiding the content from second document (partially irrelevant), "
            "the query is looking for the scientific claims that can support whether the folic acid has benefits or side effects during pregnancy via various research methods such as birth cohort study and systematic review.\n"
            "Expand[Query]: [supplementation, dietary, nutrition, risk, study, review]\n",
            2: "Example 2:\n"
            "Observe[Query]: what are the primary characteristics associated with respiratory and other nonrespiratory dysfunction?"
            # False
            "Observe[Doc 1]: Hospital and 1-year survival of patients admitted to intensive care units with acute exacerbation of chronic obstructive pulmonary disease. OBJECTIVE To describe outcomes and identify variables associated with hospital and 1-year survival for patients admitted to an intensive care unit (ICU) with an acute exacerbation of chronic obstructive pulmonary disease (COPD).\n"
            # False
            "Observe[Doc 2]: Cold-related respiratory symptoms in the general population. INTRODUCTION Cold-related respiratory symptoms are common among northern populations, especially among people suffering from respiratory diseases. However, the prevalence of such symptoms in the general population and the threshold temperatures at which the symptoms start to emerge are poorly known.\n"
            "Think[Intent]: "
            "The query asked about what are the primary characteristics associated with respiratory and other nonrespiratory dysfunction.  "
            "The first document provides the information about the Hospital and 1-year survival of patients admitted to intensive care units with acute exacerbation of chronic obstructive pulmonary disease. "
            "The second document provides the information about the Cold-related respiratory symptoms in the general population. "
            "Therefore, we can know that both documents are relevant to the query. "
            "This search engine takes query about evidence containing abstracts and the retrieved documents provides the scientific claims from pubmed that can verify the claim given by the query is true. "
            "By taking the information from first document (relevant) and avoiding the content from second document (partially irrelevant), "
            "the query is looking for the scientific claims that can support whether the respiratory and other nonrespiratory dysfunction are associated with the primary characteristics.\n"
            "Expand[Query]: [Intensive, chronic, obstructive, pulmonary, disease, symptoms, general, population]\n",
        },
    }

    def __init__(self, dataset_name: str):
        self.conversations = []
        self.response_format = None
        self.instruction = None
        self.dataset_name = dataset_name

        self.append_history(role="system", content=self._get_system_prompt())

    @staticmethod
    def __system_status():
        return (
            f"You are an intelligent assistant who can help user what to query with more clear intent."
            f"Solve a query expansion task with interleaving observation, thought, action steps.\n"
            f"Some examples will be given to help you understand the task.\n"
        )

    def __response_format(self):
        return f'Response Format: "{self.response_format}"'

    @staticmethod
    def __restriction():
        return (
            "Restriction1: The response must follow the given response format.\n"
            "Restriction2: Violating restriction 1 is strongly forbidden.\n"
            "Restriction3: Any other explanation or note cannot be added to the response."
        )

    def _get_system_prompt(self):
        return f"{self.__system_status()}"

    def __instruction(self):
        return f"Instruction: {self.instruction}"

    @staticmethod
    def __input_data(data: Dict[str, str]):
        return "\n".join(
            map(lambda row: f"{row[0].capitalize()}: {row[1]}", data.items())
        )

    def _get_user_prompt(self, data: Dict[str, str]):
        return f"{self.__instruction()}\n" f"###\n" f"{self.__input_data(data)}"

    def get_prompt(self, job_type: str, data: Dict[str, str]):
        assert job_type in self.instructions

        self.instruction = self.instructions[job_type]
        self.response_format = self.response_formats[job_type]
        return [
            {"role": "system", "content": self._get_system_prompt()},
            {"role": "user", "content": self._get_user_prompt(data)},
        ]

    def append_history(self, role, content):
        assert role in ["user", "assistant", "system"]
        self.conversations.append({"role": role, "content": content})
        print(".", end=" ")

    def get_history(self):
        return self.conversations

    def clear_history(self):
        while len(self.conversations) > 1:
            self.conversations.pop()


class LoggerBase:
    def __init__(
        self,
        name: str,
        filepath: str,
        console_print: bool = True,
        continuing: bool = False,
    ):
        self.logger = logging.getLogger(name)
        self.logger.setLevel(level=logging.DEBUG)

        log_stream_formatter = logging.Formatter(
            fmt=f"%(message)s",
        )

        datafile_handler = logging.FileHandler(
            filename=filepath, mode="a+" if continuing else "w+"
        )
        datafile_handler.setFormatter(log_stream_formatter)
        datafile_handler.setLevel(level=logging.INFO)
        self.logger.addHandler(datafile_handler)

        if console_print:
            console_handler = logging.StreamHandler(stream=sys.stdout)
            console_handler.setFormatter(log_stream_formatter)
            console_handler.setLevel(level=logging.INFO)
            self.logger.addHandler(console_handler)


class LoggerQuery(LoggerBase):
    def __init__(self, filepath: str, continuing: bool):
        super().__init__(name="NewQuery", filepath=filepath, continuing=continuing)

    def log(self, qid, query):
        self.logger.info(f"{qid}\t{query}")


class LoggerIntentQuery(LoggerBase):
    def __init__(self, filepath: str, continuing: bool):
        super().__init__(
            name="Intent-NewQuery",
            filepath=filepath,
            console_print=False,
            continuing=continuing,
        )

    def log(self, prev_query: str, docs: List[str], intent: str, new_query: str):
        self.logger.info(f"{prev_query}\t{docs}\t{intent}\t{new_query}")


class LoggerEverything(LoggerBase):
    def __init__(self, filepath: str, continuing: bool, name: str = "Everything"):
        super().__init__(
            name=name, filepath=filepath, console_print=False, continuing=continuing
        )

    def log(self, **kwargs):
        self.logger.info(json.dumps(kwargs))


wnl = WordNetLemmatizer()


def is_plural(word):
    lemma = wnl.lemmatize(word, "n")
    plural = True if word is not lemma else False
    return plural, lemma


if __name__ == "__main__":
    parser = ArgumentParser(
        "Obtain tacit intent of the query and refine with the generated intent."
    )

    parser.add_argument("--dataset_name", type=str)

    parser.add_argument("--new_queries", type=str)
    parser.add_argument("--new_queries_intents", type=str)
    parser.add_argument("--everything", type=str)
    parser.add_argument("--everything_step", type=str)

    parser.add_argument("--expansion", type=str)
    parser.add_argument("--prf_terms", type=str)

    args = parser.parse_args()

    # ============= CHECK CONTINUE ============= #
    done_qids = []
    if os.path.exists(args.new_queries):
        new_queries_df = pd.read_csv(
            args.new_queries, sep="\t", names=["q_id", "q_text"]
        )
        new_queries_df = new_queries_df.astype(str)
        done_qids = new_queries_df["q_id"].values.tolist()
    print("# ============= CONTINUING ============= #")
    print(f"# ==    from {len(done_qids) + 1}    == #")
    print("# ====================================== #")

    exp = torch.load(args.expansion)
    metadata = exp["metadata"]
    docs = dict(
        map(
            lambda info: (
                info[0],
                list(
                    map(
                        lambda d: f'{d["title"]} {d["text"]}',
                        info[1]["query_prf"]["PRF"],
                    )
                ),
            ),
            metadata.items(),
        )
    )

    queries = dict(
        map(
            lambda info: (
                info[0],
                info[1]["query"],
            ),
            metadata.items(),
        )
    )

    qids = list(filter(lambda qid: qid not in done_qids, metadata.keys()))

    # prf_terms_meta = torch.load(args.prf_terms)['metadata']
    with open(args.prf_terms) as f:
        prf_terms_meta = json.load(f)
    # ===================================== #

    # # ============= AGENT PREP ============ #
    prompt_generator = PromptGenerator(
        dataset_name=args.dataset_name,
    )
    gpt = GPT()
    # ===================================== #

    # ============ INTERACTION ============ #
    for p in [
        args.new_queries,
        args.new_queries_intents,
        args.everything,
        args.everything_step,
    ]:
        Path(p).parent.mkdir(parents=True, exist_ok=True)
    query_logger = LoggerQuery(
        args.new_queries, continuing=True if len(done_qids) > 0 else False
    )
    intent_query_logger = LoggerIntentQuery(
        args.new_queries_intents, continuing=True if len(done_qids) > 0 else False
    )
    everything_logger = LoggerEverything(
        args.everything, continuing=True if len(done_qids) > 0 else False
    )
    stepbystep_logger = LoggerEverything(
        args.everything_step,
        name="Everything-step",
        continuing=True if len(done_qids) > 0 else False,
    )

    k = 3
    for idx, qid in enumerate(qids):
        # query + LLM + action / environment stage
        query = queries[qid]
        topk_docs = docs[qid]

        prf_terms = prf_terms_meta[qid]["words"]
        prf_terms_by_doc = prf_terms_meta[qid]["d_words"]

        # few-shot examples
        prompt_generator.append_history(
            role="user",
            content="Solve a query expansion task with interleaving Observation, Thought, Action steps. \n"
            "Observe[Query, Doc]: read the query, the top-k documents retrieved by the search engine.\n"
            "Think[Intent]: think about the intent of the query. \n"
            "Expand[Query]: extract the terms from given documents to expand the query.\n"
            "###\n"
            "Here are two examples.\n"
            "Example1:\n"
            f"{prompt_generator.dataset_specific_examples[prompt_generator.dataset_name][1]}\n"
            "Example2:\n"
            f"{prompt_generator.dataset_specific_examples[prompt_generator.dataset_name][2]}\n"
            f"###\n"
            f"Do you understand the task?",
        )
        response = gpt.query(prompt=prompt_generator.get_history())
        prompt_generator.append_history(role="assistant", content=response)

        prompt_generator.append_history(
            role="user",
            content=f"Now here is the actual task.\n"
            f"###\n"
            f"A search query and the top-k documents are retrieved by the search engine.\n"
            f"I will provide you the {k} documents in order of retrieval ranks, each rank can be indicated by number identifier in the bracket [].\n"
            f"Observe[Query]: {query}\n",
        )

        for d_idx, doc_text in enumerate(topk_docs):
            # reduce the document length
            sentences: List[str] = sent_tokenize(doc_text)
            word_cnt = 0
            sliced_doc_sents = []
            for sentence in sentences:
                words = word_tokenize(sentence)
                if word_cnt + len(words) > 400:
                    continue

                word_cnt += len(words)
                sliced_doc_sents.append(sentence)

            sliced_doc_text = " ".join(sliced_doc_sents)
            prompt_generator.append_history(
                role="user", content=f"Observe[Doc {d_idx + 1}] {sliced_doc_text}\n"
            )

        # few-shot intent
        prompt_generator.append_history(
            role="user",
            content="Think the latent intent of the observed query based on the observed query, and documents.\n"
            "Relevant contents from the documents are more considered for deducing the intent and nonrelevant contents from the documents are less considered.\n"
            "Although nonrelevant contents are avoided, you can still use them if you think they are useful as background knowledge.\n"
            "###\n"
            "The intent should be specific as possible.\n"
            f"Think[Intent]: {{latent intent}}\n",
        )
        response = gpt.query(prompt=prompt_generator.get_history())
        prompt_generator.append_history(role="assistant", content=response)
        final_intent = prompt_generator.get_history()[-1]["content"]

        # few-shot query terms
        n_target_words = 5
        target_output = (
            f"[{', '.join([f'word{i + 1}' for i in range(n_target_words)])}]"
        )
        prompt_generator.append_history(
            role="user",
            content="Expand the observed query by appending missing terms with the intent you have deduced, and the observed documents.\n"
            "Relevant contents are more considered for expanding the term and nonrelevant contents are avoided.\n"
            "Although nonrelevant contents are avoided, you can still use them if you think they are useful as background knowledge.\n"
            "###\n"
            f"Intent: {final_intent}\n"
            "###\n"
            "Only response the expanding terms, do not say any other explain.\n"
            f"Total number of terms to be extracted is {n_target_words}. Do not provide more or less.\n"
            f"Follow the format given below.\n"
            f"You must give me {n_target_words} in an array surrounded by square brackets. Other formats, such as list with line change and numbered lines, are not allowed\n"
            f"Expand[Query]: {target_output}\n",
        )

        response = gpt.query(prompt=prompt_generator.get_history())
        prompt_generator.append_history(role="assistant", content=response)

        unclean_query = prompt_generator.conversations[-1]["content"]
        if (
            "Expand[Query]" not in unclean_query
            or "[" not in unclean_query
            or "]" not in unclean_query
        ) and ("apologize" in unclean_query or "sorry" in unclean_query):
            clean_query = "[]"
        else:
            clean_query = re_extract("Expand\[Query\]", unclean_query)
            clean_query = f'[{", ".join(clean_response(clean_query))}]'
            refined_query = clean_query

        # logging
        print(f"{args.dataset_name}\t{idx + 1}/{len(qids)} :: \t", end="")
        query_logger.log(qid, clean_query)
        everything_logger.log(history=prompt_generator.get_history())
        stepbystep_logger.log(original=query, intent=final_intent, prf=clean_query)
        intent_query_logger.log(
            prev_query=query, docs=[""], intent=final_intent, new_query=clean_query
        )

        prompt_generator.clear_history()
    # ===================================== #
