import pandas as pd
import os
files = os.listdir("predictions_new_template_500_fixed")

def preprocess_data(df):
    metrics = ["Relevance", "Specificity", "Simplicity", "Helpfulness", "Objectivity"]
    for metric in metrics:
        print("After")
        # print(df[metric].unique())
        df[metric].fillna(1, inplace=True)
        df[metric] = df[metric].astype(str)
        df.loc[df[metric].str.contains(" "), metric] = 1
        df.loc[df[metric] == '0', metric] = 1
        df.loc[df[metric] == '1', metric] = 1
        df.loc[df[metric] == '2', metric] = 2
        df.loc[df[metric] == '3', metric] = 3
        df.loc[df[metric] == '4', metric] = 4
        df.loc[df[metric] == '5', metric] = 5
        df.loc[df[metric] == '0.0', metric] = 1
        df.loc[df[metric] == '1.0', metric] = 1
        df.loc[df[metric] == '2.0', metric] = 2
        df.loc[df[metric] == '3.0', metric] = 3
        df.loc[df[metric] == '4.0', metric] = 4
        df.loc[df[metric] == '5.0', metric] = 5
        print(df[metric].unique())


relevant_attributes = ["Relevance", "Specificity", "Simplicity", "Helpfulness", "Objectivity", "Overall Score", "Length", "Approximate Token Amount", "Amount of Words"]

results = {
    "Metrics": relevant_attributes
}

human_eval_df = pd.read_csv("human_eval_results.csv", sep=";", decimal=",", index_col="Question Index", encoding='utf8')
index_list = human_eval_df.index.unique().tolist()

for file in files:
    cur_df = pd.read_csv(os.path.join("predictions_new_template_500_fixed", file))
    new_df = cur_df.iloc[index_list]
    new_file_name = file.split(".")[0]
    new_df.to_csv(f"predictions_new_template_500_fixed_50_selected/{new_file_name}.csv", index=False)
