import os
import random
import json
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
# from gpt import GPT
import random
import argparse
import time
import re
import threading
from collections import deque
from Lab_GPT import GPT4V

parser = argparse.ArgumentParser()
parser.add_argument("--data_path", type=str, default="")
parser.add_argument("--image_dir", type=str, default="")
parser.add_argument("--num_process", type=int, default=50)
parser.add_argument("--limit_num", type=int, default=None)
parser.add_argument("--gpt_model", type=str, default='gpt-4-vision-preview')
parser.add_argument("--query_try_num", type=int, default=1)

args = parser.parse_args()

# load data
with open(args.data_path) as f:
    tmpdata = json.load(f)

# 添加id
for iid,da in enumerate(tmpdata):
    da['process_id'] = iid

data = tmpdata

if args.limit_num:
    data = data[:args.limit_num]
    
print(f"read data:{len(data)}")



QA_prompts = {
    "Standard QA": "You need to generate a question and answer pair based on this image. The question should be simple and colloquial, suitable for testing another model's understanding of this medical image. The response, however, should be professional, demonstrating your understanding of the medical image by providing useful information obtained from the image along with detailed analysis. The reply should provide detailed and rich useful information.",
    "Doctor and Patient's Family": "You need to generate a question and answer pair based on this image. Assume the roles of a doctor and a patient's family member, discussing the results shown in the image. The doctor should explain the imaging findings in simple language and answer any questions the family might have. The family member can inquire about the causes of the illness, its severity, and treatment options. The doctor should patiently ensure the family understands the condition.",
    "Doctor to Doctor": "You need to generate a question and answer pair based on this image. This Q&A pair should be a professional discussion among doctors about the image. Mimic a doctor's tone in both asking and answering questions. The responses should provide useful information derived from the image and offer detailed analysis, ensuring the reply contains detailed and rich useful information.",
    "Intern and Specialist": "You need to generate a question and answer pair based on this image. Assume the tone of an intern doctor to ask questions and a specialist doctor to answer them. The response should provide multiple useful details obtained from the image along with detailed analysis, ensuring the reply is detailed and rich with useful information.",
    "Medical Teacher and Student": "You need to generate a question and answer pair based on this image. Assume the roles of a medical teacher and a student, engaging in an educational interaction around the image. The teacher should pose questions, asking the student to analyze the imaging and suggest possible diagnoses. The student should answer the questions and explain their observations and reasoning.",
    "Senior Doctor and Intern": "You need to generate a question and answer pair based on this image. Assume the roles of a senior doctor and an intern, discussing the image. The senior doctor should ask relevant questions to assess the intern's observational and analytical skills of the imaging. The intern should respond and explain their viewpoint.",
    "Radiologist and Clinician": "You need to generate a question and answer pair based on this image. Assume the roles of a radiologist and a clinician, discussing the radiograph. The radiologist should provide detailed interpretation of the imaging features, while the clinician should propose preliminary diagnoses based on the patient's clinical presentation. Both should collaborate closely, combining imaging and clinical information to make accurate assessments timely.",
    "Doctor and Difficult Patient": "You need to generate a question and answer pair based on this image. Assume the role of a doctor communicating with a patient skeptical about their diagnosis. The patient may pose tricky questions about the imaging results, questioning the doctor's explanations and treatment recommendations. The doctor should use the imaging data to explain the condition in an easily understandable way and answer all of the patient's questions as fully as possible to alleviate their concerns and build trust.",
    "Model Evaluator and AI Model": "You need to generate a question and answer pair based on this image. Assume the role of a member of a quality control team, focusing on assessing the AI model's visual capabilities in handling complex medical images. The team member should inquire about subtle details in the image.",
    "AI-Assisted Doctor": "You need to generate a question and answer pair based on this image. Assume the role of a doctor using an AI model to analyze a medical image to better understand a patient's condition. The doctor should follow privacy rules, not mentioning any other patient information, and discuss only the image. Specific questions should be posed to the AI model about visible structures, abnormal features, and their possible clinical significance. The AI model should analyze the image based on its algorithms and provide a detailed analysis but not make a final clinical diagnosis. The doctor will use the information provided by the AI model to aid their diagnostic decision-making process.",
    "AI Model Assisting Patient": "You need to generate a question and answer pair based on this image. Assume the role of an AI model interacting with a patient who has questions about visible contents in their medical image. The patient may be curious or confused about certain structures or markings on the image and seek clear explanations. The AI model should explain specific image details such as tissue density, shape, or any abnormal areas' potential significance in a way that avoids too much medical jargon and maintains simplicity. The AI model's responses should aim to provide educational information to help the patient better understand their imaging results, but it should emphasize that final interpretations and diagnoses must be performed by a professional doctor."
}

QA_multi_images_prompts = {
    "Standard QA": "You need to generate a question and answer pair based on these images. The question should be simple and colloquial, suitable for testing another model's understanding of these medical images. The response, however, should be professional, demonstrating your understanding of the medical images by providing useful information obtained from the images along with detailed analysis. The reply should provide detailed and rich useful information.",
    "Doctor and Patient's Family": "You need to generate a question and answer pair based on these images. Assume the roles of a doctor and a patient's family member, discussing the results shown in the images. The doctor should explain the imaging findings in simple language and answer any questions the family might have. The family member can inquire about the causes of the illness, its severity, and treatment options. The doctor should patiently ensure the family understands the condition.",
    "Doctor to Doctor": "You need to generate a question and answer pair based on these images. This Q&A pair should be a professional discussion among doctors about the images. Mimic a doctor's tone in both asking and answering questions. The responses should provide useful information derived from the images and offer detailed analysis, ensuring the reply contains detailed and rich useful information.",
    "Intern and Specialist": "You need to generate a question and answer pair based on these images. Assume the tone of an intern doctor to ask questions and a specialist doctor to answer them. The response should provide multiple useful details obtained from the images along with detailed analysis, ensuring the reply is detailed and rich with useful information.",
    "Medical Teacher and Student": "You need to generate a question and answer pair based on these images. Assume the roles of a medical teacher and a student, engaging in an educational interaction around the images. The teacher should pose questions, asking the student to analyze the imaging and suggest possible diagnoses. The student should answer the questions and explain their observations and reasoning.",
    "Senior Doctor and Intern": "You need to generate a question and answer pair based on these images. Assume the roles of a senior doctor and an intern, discussing the images. The senior doctor should ask relevant questions to assess the intern's observational and analytical skills of the imaging. The intern should respond and explain their viewpoint.",
    "Radiologist and Clinician": "You need to generate a question and answer pair based on these images. Assume the roles of a radiologist and a clinician, discussing the radiographs. The radiologist should provide detailed interpretation of the imaging features, while the clinician should propose preliminary diagnoses based on the patient's clinical presentation. Both should collaborate closely, combining imaging and clinical information to make accurate assessments timely.",
    "Doctor and Difficult Patient": "You need to generate a question and answer pair based on these images. Assume the role of a doctor communicating with a patient skeptical about their diagnosis. The patient may pose tricky questions about the imaging results, questioning the doctor's explanations and treatment recommendations. The doctor should use the imaging data to explain the condition in an easily understandable way and answer all of the patient's questions as fully as possible to alleviate their concerns and build trust.",
    "Model Evaluator and AI Model": "You need to generate a question and answer pair based on these images. Assume the role of a member of a quality control team, focusing on assessing the AI model's visual capabilities in handling complex medical images. The team member should inquire about subtle details in the images.",
    "AI-Assisted Doctor": "You need to generate a question and answer pair based on these images. Assume the role of a doctor using an AI model to analyze medical images to better understand a patient's condition. The doctor should follow privacy rules, not mentioning any other patient information, and discuss only the images. Specific questions should be posed to the AI model about visible structures, abnormal features, and their possible clinical significance. The AI model should analyze the images based on its algorithms and provide a detailed analysis but not make a final clinical diagnosis. The doctor will use the information provided by the AI model to aid their diagnostic decision-making process.",
    "AI Model Assisting Patient": "You need to generate a question and answer pair based on these images. Assume the role of an AI model interacting with a patient who has questions about visible contents in their medical images. The patient may be curious or confused about certain structures or markings on the images and seek clear explanations. The AI model should explain specific image details such as tissue density, shape, or any abnormal areas' potential significance in a way that avoids too much medical jargon and maintains simplicity. The AI model's responses should aim to provide educational information to help the patient better understand their imaging results, but it should emphasize that final interpretations and diagnoses must be performed by a professional doctor."
}


QA_prompts_pick_distribution = {k:1 for k in QA_prompts.keys()}

QA_multi_images_prompts_pick_distribution = {k:1 for k in QA_multi_images_prompts.keys()}
}

def get_prompt(reference, single_image=True):
    if single_image:
        prompt_name = random.choices(list(QA_prompts_pick_distribution.keys()), weights=QA_prompts_pick_distribution.values(), k=1)[0]
        QA_prompt = QA_prompts[prompt_name]
        query_prompt = f"""Please complete the following tasks based on the medical images and reference information provided by me. 

1. Generate a detailed and professional description (Image_description). The description must reflect your professionalism and provide as many details as possible from the image. The more comprehensive and precise, the better. 
2. {QA_prompt} 
The contextual text is marked by <reference>. You need to refer to it to ensure the accuracy of the content you generate, but do not mention the existence of this reference information when generating data.
Your reply must be in JSON format, formatted as
 {{ "Image_description" : ..., "QA-query" : ..., "QA-answer" : ... }}
<reference> {reference} </reference>"""   
    else:
        prompt_name = random.choices(list(QA_multi_images_prompts_pick_distribution.keys()), weights=QA_multi_images_prompts_pick_distribution.values(), k=1)[0]
        QA_prompt = QA_multi_images_prompts[prompt_name]
        query_prompt = f"""Please complete the following tasks based on the medical images and reference information provided by me. 

1. Generate a detailed and professional description (Image_description). The description must reflect your professionalism and provide as many details as possible from the image. The more comprehensive and precise, the better. 
2. {QA_prompt} 
The contextual text is marked by <reference>. You need to refer to it to ensure the accuracy of the content you generate, but do not mention the existence of this reference information when generating data.
Your reply must be in JSON format, formatted as
 {{ "Image_description" : ..., "QA-query" : ..., "QA-answer" : ... }}
<reference> {reference} </reference>""" 
        
    return query_prompt, prompt_name

task_name = f'{os.path.split(args.data_path)[-1].replace(".json","")}'

save_dir = f'tmp_data/{task_name}'
query_try_num = args.query_try_num
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
    print("Path created at", save_dir)
else:
    print("Path exists at", save_dir)
# english
reference_maxlen = 550
gpt = GPT4V(model_name=args.gpt_model)

def filter_str(wds,input):
    for wd in wds:
        if wd in input:
            return False
    else:
        return True

def is_percent_english(text):
    if not text: 
        return False
    
    english_chars_count = sum(1 for char in text if char.isalpha() and char.lower() in "abcdefghijklmnopqrstuvwxyz")
    total_chars_count = len(text)
    
    return english_chars_count / total_chars_count >= 0.7

def find_bracket_content(s):
    match = re.search(r'\{.*\}', s, re.DOTALL)
    if match:
        return match.group(0) 
    return None  

# Adding query filtering rules
def get_data_query(d):
    response = d['ChatGPT_response_0']
    try:
        if '{' != response[0]:
            response = find_bracket_content(response)
        da = json.loads(response)
        assert 'Caption' in da or 'Detailed_caption' in da or 'Image_description' in da
        assert 'QA-query' in da
        assert 'QA-answer' in da

        return True,da
    except Exception as e:
        print(e)
        return False,None 

wrongtime = 0
success_time = 0
# 锁用于确保线程安全地更新全局字典
lock = threading.Lock()
def write_piece_order_data(d):
    global tmp_data
    global wrongtime
    global success_time
    try:
        save_path = os.path.join(save_dir, str(d['process_id']) + ".json")
        for _ in range(query_try_num):
            # 区分单图和多图
            single_image = True
            if isinstance(d['image'], list):
                single_image = False
            
            if single_image:
                image_path = os.path.join(args.image_dir, d['image'])
                if d.get("in_text_mention") is None:
                    chatgpt_query, QA_scenario = get_prompt(d['caption'][:reference_maxlen])
                else:
                    chatgpt_query, QA_scenario = get_prompt(d['caption'][:reference_maxlen] + "\n" + d['in_text_mention'][0]["tokens"])
            else:
                image_path = [os.path.join(args.image_dir, img) for img in d['image']]
                if d.get("in_text_mention") is None:
                    chatgpt_query, QA_scenario = get_prompt(d['caption'][:reference_maxlen], single_image=False)
                else:
                    chatgpt_query, QA_scenario = get_prompt(d['caption'][:reference_maxlen] + "\n" + d['in_text_mention'][0]["tokens"], single_image=False)
            
            d['QA_scenario'] = QA_scenario
            d['ChatGPT_query'] = chatgpt_query
            response_content, json_content = gpt.json_call(chatgpt_query, image_path)  
            d['ChatGPT_response_0'] = response_content
            if json_content != None and "Image_description" in json_content and "QA-query" in json_content and "QA-answer" in json_content:
                d['gpt4v'] = json_content
                break
        
        if 'gpt4v' not in d:
            with open(save_path, mode="w", encoding="utf-8") as fw:
                json.dump(d, fw, ensure_ascii=False,indent=2)
                wrongtime = 0

        assert 'gpt4v' in d, 'no gpt4v'
        with lock:
            success_time += 1
        with open(save_path, mode="w", encoding="utf-8") as fw:
            json.dump(d, fw, ensure_ascii=False,indent=2)
            wrongtime = 0
            
    except Exception as e:
        print(str(e),flush=True)
        wrongtime += 1
        if wrongtime > 50:
            assert 1 == 0, 'wrong'
        with open(save_path, mode="w", encoding="utf-8") as fw:
            json.dump(d, fw, ensure_ascii=False,indent=2)
    return 1
        
def deduplicate(data,finished):
    idset = set()
    for da in finished:
        idset.add(da['process_id'])

    dedup_data=[]
    for da in data:
        if da['process_id'] not in idset:
            dedup_data.append(da)
    return dedup_data

def merge_files(save_dir):
    _, _, filenames = [i for i in os.walk(save_dir)][0]
    json_files = [f for f in filenames if f.endswith('.json')]
    res = []
    for file_path in json_files:
        try:
            with open(os.path.join(save_dir, file_path), encoding="utf-8") as f:
                da = json.loads(f.read())
                del da['ChatGPT_response_0']
                del da['ChatGPT_query']
                assert 'gpt4v' in da
                res.append(da)
        except Exception as e:
            # print(str(e))
            pass
    return res

#  start runing
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
    print("Path created at", save_dir)

finished_data = merge_files(save_dir)
print(f'finished_data: {len(finished_data)}')

data = deduplicate(data,finished_data)
print(f"{len(data)} to be processed")
random.shuffle(data)

current_time = 0
record_time = 0
# 输入为分钟
def print_counts_periodically(interval, task_name):
    with open(f'log/{task_name}.log','w') as fw:
        global success_time
        global current_time
        global record_time
        while True:
            time.sleep(interval*60)
            record_time += 1
            logstr = f'在{interval}分钟内 请求成功速度为: {(success_time - current_time)/interval} 次/分钟\n'
            logstr += f'总速度为 {(success_time)/ (interval*record_time) } 次/分钟\n'
            logstr += f'已成功数据量 {success_time} 总时间为:{record_time*interval/60}小时\n'
            # 记录当前时间
            logstr += f'当前时间为 {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}\n\n'
            print(logstr,flush=True)
            fw.write(logstr)
            fw.flush()
            current_time = success_time

print_thread = threading.Thread(target=print_counts_periodically, args=(3,task_name))
print_thread.start()

with ThreadPoolExecutor(max_workers=args.num_process) as executor:
    results = list(tqdm(executor.map(write_piece_order_data, data), total=len(data), desc="Processing samples", unit="sample"))

print(f'finish_')
finished_data = merge_files(save_dir)
print(f'generate data:{len(finished_data)} from all data {len(data)}')
with open(f'output/{task_name}.json','w') as fw:
    json.dump(finished_data,fw,ensure_ascii=False,indent=2)