import pandas as pd
import os

from utils import (
    get_average_accuracy_per_model,
    get_original_dataset,
    get_questions_answered_by_all_roleplayed_levels,
    get_student_levels_from_prompt_idx,
)
from constants import (
    RACE,
    CUPA,
    ARC,
    OUTPUT_DATA_DIR,
    GPT_3_5,
    TEST,
    DEV,
)

DATASET = CUPA
PROMPT_IDX = 100
MODEL = GPT_3_5
SPLIT = TEST


def main():
    data_path = os.path.join(OUTPUT_DATA_DIR, SPLIT, f'{MODEL}_responses_{DATASET}')

    student_levels = get_student_levels_from_prompt_idx(PROMPT_IDX)
    # the 1+idx is needed for backward compatibility with files written with a previous script.
    filename = f"{MODEL}_grade_answers_prompt{PROMPT_IDX}_0shot_a_1.csv"
    df = pd.read_csv(os.path.join(data_path, filename))

    complete_df = get_original_dataset(DATASET)

    # to keep only the questions that are answered by all models
    set_q_ids = get_questions_answered_by_all_roleplayed_levels([df], complete_df)

    avg_accuracy_per_model, avg_accuracy_per_grade_per_model = get_average_accuracy_per_model([df], set_q_ids, complete_df)
    print(avg_accuracy_per_model)
    print(avg_accuracy_per_grade_per_model)


if __name__ == "__main__":
    main()
