# Data to be written to the file
from tqdm import tqdm
import json
import ast
import re
import csv
import os
stage = "stage_3"
language_model = "gpt4"
annotator = "1st"
sub_stage = "regenerate"
biography_quick_review = """
- Question #0: Is the given biography relevant, irrelevant to the corresponding figure or an abstain generation?
    A. Relevant.
    B. Irrelevant.
    C. Abstain.      
"""
question_to_write = """
            Question #1: Is the given fact properly broken-down from the original sentence? Refer to instructions within Step 2.4 of the provided rule.
                A. Yes.
                B. No. Need split.
                C. No. Need merge.
                D. No. Need edit (not aligned with the sentence).
            Question #2: If the given is not good (the answer is not "Yes"), provide your explanation and edit. 
                Answer:
            Question #3: Is the fact factually correct according to (supported by) Wikipedia? Refer to instructions within Step 2.1 and Tips of the provided rule.
                A. Supported.
                B. Not-supported.
                C. Irrelevant.
            Question #4: Provide supporting evidences of your Question #3's answer or edit if the is not supported. Refer to instructions within Step 2.2, 2.3 and Tips of the provided rule.
                Answer:
            Question #5: If the answer for question #3 is non-supported, do you think it is because the Wikipedia source in the language has limited coverage (no evidences found)? In other words, do you think the Wikipedia page is too short to evaluate this biography?
                A. Yes. I think the Wikipage has limited information.
                B. No. The fact is clearly unsupported based on the Wikipage or very unlikely to be true (based on your intuition and knowledge).
"""

data_to_write = """--------------------Qualification test - Bengali--------------------
--------------------
--------------------
--------------------INTRODUCTION--------------------
- You will be given paragraphs of people's biographies generated by a language model (Gemini or GPT4), their internal sentences and facts embedded within each sentence. Make each fact factual validation based on Wikipedia. Please read the attached rule thoroughly before getting started.

- Rule link: https://docs.google.com/document/d/1UoaaaN4BDUb6w_8MFE9NQ9PPYrAoOFaT1hWoakEzP68/edit?usp=sharing.

- An answer sheet is provided, please complete the test through the answer sheet.

- Each biography contains a number of sentences, which also consist of multiple facts embedded internally. For each fact, 4 questions are given, read the rule on how to answer each properly following pre-defined criteria.

- Provided biographies generated by Gemini/GPT4 could be inaccurate, controversial or even annoying. We apologise for any such cases and sincerely appreciate your understanding.

------------EXAMPLE------------
- Figure: ...
- Wikipage link: ...
 
- Read the below biography paragraph to get familiarized with the topic (Step 1).

- Biography: ...

- Question #0: Is the given biography relevant, irrelevant to the corresponding figure or an abstain generation?
    A. Relevant.
    B. Irrelevant.
    C. Abstain.

- In the following, a number of sentences within the above biography paragraph will be given. Each sentence is paired with a series of facts embedded in the sentence. You will answer two questions per fact. The first is about assess facts based on its original sentence. The second is about factually validating facts based on Wikipedia. Please consult the provided rule extensively. If the sentence is irrelevant, then there is no need to assess the fact and label all of them as irrlevant.

1. SENTENCE: ...
	- 1.a. FACT: ...
            Question #1: Is the given fact properly broken-down from the original sentence? Refer to instructions within Step 2.4 of the provided rule.
                A. Yes.
                B. No. Need split.
                C. No. Need merge.
                D. No. Need edit (not aligned with the sentence).
            Question #2: If the given is not good (the answer is not "Yes"), provide your explanation and edit. 
                Answer:
            Question #3: Is the fact factually correct according to (supported by) Wikipedia? Refer to instructions within Step 2.1 and Tips of the provided rule.
                A. Supported.
                B. Not-supported.
                C. Irrelevant.
            Question #4: Provide supporting evidences of your Question #3's answer or edit if the is not supported. Refer to instructions within Step 2.2, 2.3 and Tips of the provided rule.
                Answer:
            Question #5: If the answer for question #3 is non-supported, do you think it is because the Wikipedia source in the language has limited coverage (no evidences found)? In other words, do you think the Wikipedia page is too short to evaluate this biography?
                A. Yes. I think the Wikipage has limited information.
                B. No. The fact is clearly unsupported based on the Wikipage or very unlikely to be true (based on your intuition and knowledge).
------------END OF EXAMPLE------------
--------------------END OF INTRODUCTION--------------------
"""
data_to_write_csv = []
alphab = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "h", "y", "z"]
json_bengali_path = f"~/FActScore/data/to_annotate_data/bn/task/stage_3/gpt4_regenerate_have_facts.jsonl"
with open(json_bengali_path) as f:
    for i, line in tqdm(enumerate(f)):
        dp = json.loads(line)
        data_to_write_csv.append(["Figure: " + dp["topic"], "", "", "", "", ""])
        data_to_write_csv.append(["Biography quick review", "Answer of Question #0"])
        data_to_write_csv.append(["Question #0", ""])
        data_to_write_csv.append(["", ""])
        data_to_write_csv.append(["FACT\Question", "Question #1 (A/B/C)", "Question #2", "Question #3 (A/B/C)", "Question #4", "Question #5 (A/B)", "Comment"])
        data_to_write += "\n"
        data_to_write += "--------------------START OF " + dp["topic"] + "'S BIOGRAPHY--------------------\n"
        data_to_write += "- Query: \"" + dp["input"] + "\"\n"
        data_to_write += "- Figure: " + dp["topic"] + ".\n"
        data_to_write += "- Wikipage link: " + dp["link"] + ".\n\n"
        data_to_write += "- Read the below biography paragraph to get familiarized with the topic (Step 1).\n\n"
        data_to_write += "- Biography: \"" + dp["output"] + "\"\n\n"
        data_to_write += biography_quick_review + "\n\n"
        data_to_write += "- In the following, a number of sentences within the above biography paragraph will be given. Each sentence is paired with a series of facts embedded in the sentence. You will answer two questions per fact. The first is about assess facts based on its original sentence. The second is about factually validating facts based on Wikipedia. Please consult the provided rule extensively. If the sentence is irrelevant, then there is no need to assess the fact and label all of them as irrlevant.\n\n"
        for i, sent in enumerate(dp["annotations"]):
            data_to_write += str(i+1)
            data_to_write += ". SENTENCE: \""
            data_to_write += sent["text"] + "\""
            data_to_write += "\n"
            for j, fact in enumerate(sent["model-atomic-facts"]):
                data_to_write_csv.append([str(i+1) + "." + alphab[j] + ". " + fact, "", "", "", "", ""])
                data_to_write += "\t" + str(i+1) + "." + alphab[j] + ". FACT: \"" + fact + "\"\n"
                data_to_write += "\t\t" + question_to_write
        data_to_write += "--------------------END OF " + dp["topic"] + "'S BIOGRAPHY--------------------\n"
        data_to_write_csv.append(["", "", "", "", "", ""])
# Specify the file path and mode ('w' for write)
os.makedirs(f"~/FActScore/data/to_annotate_data/bn/task/{stage}/{sub_stage}/{annotator}/working_files/", exist_ok=True)
#~/FActScore/data/to_annotate_data/bn/task/stage_3/regenerate/1st/working_files/
file_path = f"~/FActScore/data/to_annotate_data/bn/task/{stage}/{sub_stage}/{annotator}/working_files/qualitifcation_test_Bengali_{language_model}_{annotator}.txt"

# Open the file in write mode
with open(file_path, 'w') as file:
    # Write the data to the file
    file.write(data_to_write)

# Print a message indicating that the data has been written
print(f"Data has been written to {file_path}.")
file_name = f"~/FActScore/data/to_annotate_data/bn/task/{stage}/{sub_stage}/{annotator}/working_files/answer_sheet_Bengali_{language_model}_{annotator}_{sub_stage}.csv"

# Writing to CSV file
with open(file_name, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write the data to the CSV file
    writer.writerows(data_to_write_csv)

print(f"CSV file '{file_name}' created successfully.")