# Data to be written to the file
from tqdm import tqdm
import json
import ast
import re
import csv
lang = "es"
stage = "sub_task_1"
language_model = "gpt4"
annotator = "2nd"
question_to_write = """
            Question #1: Is the fact factually correct according to (supported by) your language Wikipedia? Refer to instructions within Step 2.1 and Tips of the provided rule.
                A. Supported.
                B. Not-supported.
                C. Irrelevant.
            Question #2: Provide supporting evidences of your Question #1's answer or edit if the is not supported. Refer to instructions within Step 2.2, 2.3 and Tips of the provided rule.
                Answer:
            Question #3: Is the fact factually correct according to (supported by) English Wikipedia? Similar to question #0 but with different sources.
                A. Supported.
                B. Not-supported.
                C. Irrelevant.
            Question #4: Provide supporting evidences of your Question #3's answer or edit if the is not supported. Similar to question #1 but with different sources.
                Answer:
            Question #5: Is the fact factually correct according to (supported by) the Internet? Similar to question #0 but with different sources.
                A. Supported.
                B. Not-supported.
                C. Irrelevant.
            Question #6: Provide supporting evidences of your Question #5's answer or edit if the is not supported. Similar to question #1 but with different sources.
                Answer:
"""
biography_quick_review = """
- Question #0: Is the given biography relevant, irrelevant to the corresponding figure or an abstain generation?
    A. Relevant.
    B. Irrelevant.
    C. Abstain.      
"""
data_to_write = """--------------------Qualification test--------------------
--------------------
--------------------
--------------------INTRODUCTION--------------------
- You will be given paragraphs of people's biographies generated by a language model (Gemini or GPT4), their internal sentences and facts embedded within each sentence. Make each fact factual validation based on Wikipedia. Please read the attached rule thoroughly before getting started.

- Rule link: https://docs.google.com/document/d/1UoaaaN4BDUb6w_8MFE9NQ9PPYrAoOFaT1hWoakEzP68/edit?usp=sharing.

- Provided biographies generated by Gemini/GPT4 could be inaccurate, controversial or even annoying. We apologise for any such cases and sincerely appreciate your understanding.

------------EXAMPLE------------
- Figure: ...
- Wikipage link: ...
- English Wikipage link: ...
 
- Read the below biography paragraph to get familiarized with the topic (Step 1).

- Biography: ...

1. SENTENCE: ...
	- 1.a. FACT: ...
            Question #1: Is the fact factually correct according to (supported by) your language Wikipedia? Refer to instructions within Step 2.1 and Tips of the provided rule.
                A. Supported.
                B. Not-supported.
                C. Irrelevant.
            Question #2: Provide supporting evidences of your Question #1's answer or edit if the is not supported. Refer to instructions within Step 2.2, 2.3 and Tips of the provided rule.
                Answer:
            Question #3: Is the fact factually correct according to (supported by) English Wikipedia? Similar to question #0 but with different sources.
                A. Supported.
                B. Not-supported.
                C. Irrelevant.
            Question #4: Provide supporting evidences of your Question #3's answer or edit if the is not supported. Similar to question #1 but with different sources.
                Answer:
            Question #5: Is the fact factually correct according to (supported by) the Internet? Similar to question #0 but with different sources.
                A. Supported.
                B. Not-supported.
                C. Irrelevant.
            Question #6: Provide supporting evidences of your Question #5's answer or edit if the is not supported. Similar to question #1 but with different sources.
                Answer:
------------END OF EXAMPLE------------
--------------------END OF INTRODUCTION--------------------
"""
data_to_write_csv = []
alphab = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "h", "y", "z"]

json_arabic_path = f"~/FActScore/data/to_annotate_data/{lang}/task/{stage}/{language_model}_{annotator}_have_facts.jsonl"
with open(json_arabic_path) as f:
    for i, line in tqdm(enumerate(f)):
        dp = json.loads(line)
        data_to_write_csv.append(["Figure: " + dp["topic"], "", "", "", "", ""])
        data_to_write_csv.append(["FACT\Question", "Question #1 (A/B/C)", "Question #2", "Question #3 (A/B/C)", "Question #4", "Question #5 (A/B/C)", "Question #6", "Comment"])
        data_to_write += "\n"
        data_to_write += "--------------------START OF " + dp["topic"] + "'S BIOGRAPHY--------------------\n"
        data_to_write += "- Query: \"" + dp["input"] + "\"\n"
        data_to_write += "- Figure: " + dp["topic"] + ".\n"
        data_to_write += "- Wikipage link: " + dp["link"] + ".\n\n"
        data_to_write += "- English Wikipage link: " + dp["en_link"] + ".\n\n"
        data_to_write += "- Read the below biography paragraph to get familiarized with the topic (Step 1).\n\n"
        data_to_write += "- Biography: \"" + dp["output"] + "\"\n\n"
        for i, sent in enumerate(dp["annotations"]):
            data_to_write += str(i+1)
            data_to_write += ". SENTENCE: \""
            data_to_write += sent["text"] + "\""
            data_to_write += "\n"
            if len(sent["model-atomic-facts"]) == 0:
                data_to_write_csv.append([str(i+1) +  ". " + sent["text"], "", "", "", "", ""]) 
            for j, fact in enumerate(sent["model-atomic-facts"]):
                data_to_write_csv.append([str(i+1) + "." + alphab[j] + ". " + fact, "", "", "", "", ""])
                data_to_write += "\t" + str(i+1) + "." + alphab[j] + ". FACT: " + fact + "\n"
                data_to_write += "\t\t" + question_to_write
        data_to_write += "--------------------END OF " + dp["topic"] + "'S BIOGRAPHY--------------------\n"
        data_to_write_csv.append(["", "", "", "", "", ""])
# Specify the file path and mode ('w' for write)
file_path = f"~/FActScore/data/to_annotate_data/{lang}/task/{stage}/{annotator}/working_files/qualitifcation_test_{language_model}_{annotator}.txt"

# Open the file in write mode
with open(file_path, 'w') as file:
    # Write the data to the file
    file.write(data_to_write)

# Print a message indicating that the data has been written
print(f"Data has been written to {file_path}.")


file_name = f"~/FActScore/data/to_annotate_data/{lang}/task/{stage}/{annotator}/working_files/answer_sheet_{language_model}_{annotator}.csv"

# Writing to CSV file
with open(file_name, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write the data to the CSV file
    writer.writerows(data_to_write_csv)

print(f"CSV file '{file_name}' created successfully.")