import pandas as pd
import os
files = os.listdir("predictions_human")

def preprocess_data(df):
    metrics = ["Relevance", "Specificity", "Simplicity", "Helpfulness", "Objectivity"]
    for metric in metrics:
        print("After")
        # print(df[metric].unique())
        df[metric].fillna(1, inplace=True)
        df[metric] = df[metric].astype(str)
        df.loc[df[metric].str.contains(" "), metric] = 1
        df.loc[df[metric] == '0', metric] = 1
        df.loc[df[metric] == '1', metric] = 1
        df.loc[df[metric] == '2', metric] = 2
        df.loc[df[metric] == '3', metric] = 3
        df.loc[df[metric] == '4', metric] = 4
        df.loc[df[metric] == '5', metric] = 5
        df.loc[df[metric] == '0.0', metric] = 1
        df.loc[df[metric] == '1.0', metric] = 1
        df.loc[df[metric] == '2.0', metric] = 2
        df.loc[df[metric] == '3.0', metric] = 3
        df.loc[df[metric] == '4.0', metric] = 4
        df.loc[df[metric] == '5.0', metric] = 5
        print(df[metric].unique())

relevant_attributes = ["Relevance", "Specificity", "Simplicity", "Helpfulness", "Objectivity", "Overall Score", "Length", "Approximate Token Amount", "Amount of Words"]

results = {
    "Metrics": relevant_attributes
}
for file in files:
    cur_df = pd.read_csv(os.path.join("predictions_human", file))
    print(file)
    preprocess_data(cur_df)
    cur_df["Overall Score"] = cur_df["Relevance"] + cur_df["Specificity"] + cur_df["Simplicity"] + cur_df["Helpfulness"] + cur_df["Objectivity"]
    columns = cur_df.columns.tolist()
    if "prediction" in columns:
        answer_col = "prediction"
    elif "answer_1" in columns:
        answer_col = "answer_1"
    elif "answer_2" in columns:
        answer_col = "answer_2"
    elif "reference_1" in columns:
        answer_col = "reference_1"
    elif "reference_2" in columns:
        answer_col = "reference_2"
    cur_df["Length"] = cur_df[answer_col].str.len()
    cur_df["Approximate Token Amount"] = cur_df[answer_col].str.len() / 3.76
    cur_df["Amount of Words"] = cur_df[answer_col].str.split(" ").str.len()
    result = []
    for attribute in relevant_attributes:
        result.append(round(cur_df[attribute].std(), 2))
        # result.append(cur_df[attribute].mean().round(2))
    results[file.split(".")[0]] = result

result_df = pd.DataFrame.from_dict(results)
result_df.to_csv("eval_results_final_data_500_human_eval_std_2_template_1_2.csv", sep=";", decimal=",")
