import math
import os.path
import re

import pandas as pd
import scipy
import tqdm
import numpy as np

from data.load_eval_df import load_train_df, load_dev_df, load_test_df
from project_root import join_with_root

from mt_metrics_eval import tau_optimization
from mt_metrics_eval import stats

def read_df(k, v):
    # Read dataframe and assign tag in column
    df = pd.concat([pd.read_json(f) for f in v])
    df["model_tag"] = [k] * len(df)
    return df


def filter_score(score, format_prompt):
    # Filters each row with the regex string defined in the format_prompt. For MQM, the MQM parser method is used
    # instead
    found = None
    try:
        if num_there(format_prompt):
            found = re.findall("[-+]?(?:\d*\.*\d+)", score)
            return float(found[-1])
        else:
            return re.findall(format_prompt, score.lower())[-1]

    except Exception as e:
        # print("Ignoring error: ", e)

        return np.NaN


def num_there(s):
    # Checks if a string contains a number
    return any(i.isdigit() for i in s)


def apply_tie_correction(x,y):
    result = tau_optimization.tau_optimization(
        np.array([np.array(x)]), np.array([np.array(y)]), tau_optimization.TauSufficientStats.acc_23,
    )
    accuracy, _ = stats.KendallVariants(
        y, x, variant="acc23", epsilon=result.best_tau
    )
    return accuracy


def save_corr(a, b):
    try:
        kendall = scipy.stats.kendalltau(a,b, nan_policy="raise").statistic
    except:
        kendall = 0
    try:
        kendall_tie_corrected =  apply_tie_correction(a,b)
    except:
        kendall_tie_corrected = 0
    try:
        pearson =  scipy.stats.pearsonr(a,b).statistic
    except:
        pearson = 0
    try:
        spearman =  scipy.stats.spearmanr(a,b, nan_policy="raise").statistic
    except:
        spearman = 0

    return {
        "kendall": kendall,
        "kendall_tie_corrected": kendall_tie_corrected,
        "pearson": pearson,
        "spearman": spearman
    }


def scores_to_float(row):
    # Translate text labels to scores
    score_dict1 = {"bad": 1, "neutral": 3, "good": 5}
    score_dict2 = {"catastrophic": 1, "indifferent": 3, "marvelous": 5}

    reformatted_scores = []
    for prompt, score in zip(row["prompts"], row["generated_text"]):
        new_score = filter_score(score, prompt["format_prompt"]["regex"])
        new_score = score_dict1[new_score] if new_score in score_dict1 else new_score
        new_score = score_dict2[new_score] if new_score in score_dict2 else new_score
        reformatted_scores.append(new_score)
    return reformatted_scores


def reformat_df(file_paths, outname=None, filler=0, force=False):
    if outname and os.path.isfile(join_with_root(f"outputs/cleaned/{outname}.json")) and not force:
        return pd.read_json(join_with_root(f"outputs/cleaned/{outname}.json"))
    # Load a concatenated dataframe with all the results written in file_paths
    df = pd.concat([read_df(k, v) for k, v in file_paths.items()])

    # apply the regex filters for the score and replace the results of text-based approaches
    df["score"] = df.apply(lambda row: scores_to_float(row), axis=1)

    print(f"There are {df['score'].explode().isna().sum()} na values at the moment. They will be replaced with their "
          f"avg")

    df["score_unfilled"] = df["score"]

    def f(x):
        scores = np.array(x.tolist())
        avgs = np.nanmean(scores, axis=0)
        inds = np.where(np.isnan(scores))
        scores[inds] = np.take(avgs, inds[1])
        return scores.tolist()

    df['score'] = df.groupby(["task", "model_tag"])["score"].transform(f)

    if outname:
        df.to_json(join_with_root(f"outputs/cleaned/{outname}.json"), orient="records")

    return df


def compute_correlation(df, outname):
    '''
    Computes the given correlation type on the files defined in the file path, and the specified dataset
    @param outname: The name of the file the results are written to. Do not specify the extension.
    @param type: The type of the correlation that should be computed. Supported: kendall, pearson, spearman
    @param filler: Value to fill for results where the regex string did not match successfully
    @param restrict_by_min: If true, the number of samples compared will be restricted by the smallest non 0 amount
    of samples computed for one setting
    @return: Nothing, things get written into evalution/outputs_v2
    '''


    # Compute the correlations for each dimension and write them into a list of dicts
    results = []
    for name, group in tqdm.tqdm(df.groupby(["task", "model_tag"], dropna=False)):
        scores = np.array(group["score"].tolist()).T.tolist()
        prompts = np.array(group["prompts"].tolist()).T.tolist()
        gt_scores = group["GT_Score"].tolist()
        print(len(scores))
        for s, p in tqdm.tqdm(zip(scores, prompts)):

            corrs = save_corr(s, gt_scores)
            # Compute the selected correlation and save all the important properties
            result = {
                "name": name,
                "regex": p[0]["format_prompt"],
                "task_description": p[0]["task_description"],
                "task": name[0],
                "prompt": p[0]["base_prompt"]["name"],
                "model": name[1],
                "count": len(s),
                "kendall": corrs["kendall"],
                "kendall_tie_corrected": corrs["kendall_tie_corrected"],
                "pearson": corrs["pearson"],
                "spearman": corrs["spearman"],
            }
            results.append(result)

    out = pd.DataFrame(results)
    out.to_excel(join_with_root(f"outputs/evaluation/{outname}.xlsx"))
    out.to_json(join_with_root(f"outputs/evaluation/{outname}.json"))

def huge_ensemble(df, outname):
    # Compute the correlations for each dimension and write them into a list of dicts
    results = []
    for name, group in tqdm.tqdm(df.groupby(["task", "model_tag"], dropna=False)):
        scores = np.array(group["score"].tolist()).T.tolist()
        prompts = np.array(group["prompts"].tolist()).T.tolist()

        scores = [[a if a != None else 0 for a in s] for s in scores]
        scores = [scipy.stats.zscore(s, axis=None) for s in scores]
        scores = [[a if not np.isnan(a) else 0 for a in s] for s in scores]
        scores = np.mean(scores, axis = 0)


        gt_scores = group["GT_Score"].tolist()
        corrs = save_corr(scores, gt_scores)
        # Compute the selected correlation and save all the important properties
        result = {
            "name": name,
            "task": name[0],
            "model": name[1],
            "kendall": corrs["kendall"],
            #"kendall_tie_corrected": corrs["kendall_tie_corrected"],
            "pearson": corrs["pearson"],
            "spearman": corrs["spearman"],
        }
        results.append(result)

    out = pd.DataFrame(results)
    print(out)
    #out.to_excel(join_with_root(f"outputs/evaluation/{outname}.xlsx"))
    #out.to_json(join_with_root(f"outputs/evaluation/{outname}.json"))

if __name__ == '__main__':
    # train_df = load_train_df()
    # dev_df = load_dev_df()
    # test_df = load_test_df()

    zero_shot_train = {
        "Nous": [join_with_root("outputs/raw/train/zero_shot/slurm_pool_NousResearch_Nous-Hermes-13b_0_250_of_27116_vllm_en_de.json"),
                 join_with_root("outputs/raw/train/zero_shot/slurm_pool_NousResearch_Nous-Hermes-13b_250_500_of_27116_vllm_en_de.json"),
                 join_with_root("outputs/raw/train/zero_shot/slurm_pool_NousResearch_Nous-Hermes-13b_0_250_of_27116_vllm_zh_en.json"),
                 join_with_root("outputs/raw/train/zero_shot/slurm_pool_NousResearch_Nous-Hermes-13b_250_500_of_27116_vllm_zh_en.json"),
                 join_with_root("outputs/raw/train/zero_shot/slurm_pool_NousResearch_Nous-Hermes-13b_0_250_of_27116_vllm_summarization"
                                ".json"),
                 join_with_root("outputs/raw/train/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                                "-13b_250_322_of_27116_vllm_summarization.json"),
                 ],
        "OpenOrca": [
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_0_250_of_27116_vllm_en_de.json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_250_500_of_27116_vllm_en_de.json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_0_250_of_27116_vllm_zh_en.json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_250_500_of_27116_vllm_zh_en.json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_0_250_of_27116_vllm_summarization"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_250_322_of_27116_vllm_summarization.json")
            ],
        "Platypus70B": [
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_0_50_of_27116_vllm_en_de.json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_50_100_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_100_150_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_150_200_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_200_250_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_250_300_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_300_350_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_350_400_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_400_450_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_450_500_of_27116_vllm_en_de"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_0_50_of_27116_vllm_zh_en.json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_50_100_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_100_150_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_150_200_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_200_250_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_250_300_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_300_350_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_350_400_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_400_450_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_450_500_of_27116_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct"
                           "-GPTQ_0_50_of_27116_vllm_summarization"
                           ".json"),
            join_with_root(
                "outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_50_100_of_27116_vllm_summarization"
                ".json"),
            join_with_root(
                "outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_100_150_of_27116_vllm_summarization"
                ".json"),
            join_with_root(
                "outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_150_200_of_27116_vllm_summarization"
                ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_200_250_of_27116_vllm_summarization"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_250_300_of_27116_vllm_summarization"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_300_350_of_27116_vllm_summarization"
                           ".json"),
        ]
    }

    zero_shot_train_emotion = {
        "Nous" : [
            join_with_root("outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_NousResearch_Nous-Hermes"
                                "-13b_0_555_of_1320_vllm_en_de"
                                ".json"),
            join_with_root("outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_555_of_1320_vllm_zh_en"
                           ".json"),
            join_with_root("outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_555_of_1320_vllm_summarization"
                           ".json"),

        ],
        "OpenOrca": [
            join_with_root(
                "outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_0_555_of_1320_vllm_en_de"
                ".json"),
            join_with_root(
                "outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_0_555_of_1320_vllm_zh_en"
                ".json"),
            join_with_root(
                "outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_Open-Orca_OpenOrca-Platypus2"
                "-13B_0_555_of_1320_vllm_summarization"
                ".json"),
        ],
        "Platypus70B": [
            join_with_root(
                "outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_0_555_of_1320_vllm_en_de.json"),
            join_with_root(
                "outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_0_555_of_1320_vllm_zh_en"
                ".json"),
            join_with_root(
                "outputs/raw/train/zero_shot_emotion_cot_2nd/slurm_pool_TheBloke_Platypus2-70B-Instruct"
                "-GPTQ_0_555_of_1320_vllm_summarization"
                ".json"),
        ],

    }

    few_shot_train = {
        "Nous": [join_with_root("outputs/raw/train/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                                "-13b_0_250_of_1320_vllm_en_de_None"
                                ".json"),
                 join_with_root("outputs/raw/train/few_shot/slurm_pool_NousResearch_Nous-Hermes-13b_250_500_of_1320_vllm_en_de_None.json"),
                 join_with_root("outputs/raw/train/few_shot/slurm_pool_NousResearch_Nous-Hermes-13b_0_250_of_1320_vllm_zh_en_None.json"),
                 join_with_root("outputs/raw/train/few_shot/slurm_pool_NousResearch_Nous-Hermes-13b_250_500_of_1320_vllm_zh_en_None.json"),
                 join_with_root("outputs/raw/train/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                                "-13b_0_151_of_1320_vllm_summarization_None"
                                ".json"),
                 join_with_root(
                     "outputs/raw/train/few_shot/slurm_pool_NousResearch_Nous-Hermes-13b_151_322_of_1320_vllm_summarization_None.json")
                 ],
        "OpenOrca": [
            join_with_root("outputs/raw/train/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_0_250_of_1320_vllm_en_de_None.json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_250_500_of_1320_vllm_en_de_None.json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_0_250_of_1320_vllm_zh_en_None.json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_250_500_of_1320_vllm_zh_en_None.json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B_0_151_of_1320_vllm_summarization_None"
                           ".json"),
            join_with_root(
                "outputs/raw/train/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2"
                "-13B_151_322_of_1320_vllm_summarization_None.json")
        ],
        "Platypus70B": [
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_0_50_of_1320_vllm_en_de_None.json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_50_100_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_100_150_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_150_200_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_200_250_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_250_300_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_300_350_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_350_400_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_400_450_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_450_500_of_1320_vllm_en_de_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_0_50_of_1320_vllm_zh_en_None.json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_50_100_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_100_150_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_150_200_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_200_250_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_250_300_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_300_350_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_350_400_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_400_450_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_450_500_of_1320_vllm_zh_en_None"
                           ".json"),
            join_with_root("outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct"
                           "-GPTQ_0_50_of_1320_vllm_summarization_None"
                           ".json"),
            join_with_root(
                "outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_50_100_of_1320_vllm_summarization_None"
                ".json"),
            join_with_root(
                "outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_100_150_of_1320_vllm_summarization_None"
                ".json"),
            join_with_root(
                "outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_150_200_of_1320_vllm_summarization_None"
                ".json"),
            join_with_root(
                "outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_200_250_of_1320_vllm_summarization_None"
                ".json"),
            join_with_root(
                "outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_250_300_of_1320_vllm_summarization_None"
                ".json"),
            join_with_root(
                "outputs/raw/train/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ_300_322_of_1320_vllm_summarization_None"
                ".json"),
        ]
    }

    zero_shot_dev = {
        "Nous": [
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_4000_of_19144_vllm_en_de_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_4000_10000_of_19144_vllm_en_de_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_4000_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_4000_8000_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_8000_11000_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_600_of_19144_vllm_summarization_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_600_1400_of_19144_vllm_summarization_dev.json"),
                 ],
        "OpenOrca": [
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_4000_of_19144_vllm_en_de_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_4000_10000_of_19144_vllm_en_de_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_4000_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_4000_8000_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_8000_11000_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_600_of_19144_vllm_summarization_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_600_1400_of_19144_vllm_summarization_dev.json"),
        ],
        "Platypus70B": [
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_4000_of_19144_vllm_en_de_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_4000_7000_of_19144_vllm_en_de_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_7000_10000_of_19144_vllm_en_de_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_2500_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_2500_5500_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_5500_8000_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_8000_11000_of_19144_vllm_zh_en_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_600_of_19144_vllm_summarization_dev.json"),
            join_with_root("outputs/raw/dev/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_600_1400_of_19144_vllm_summarization_dev.json"),
        ]
    }

    few_shot_dev = {
        "Nous": [
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_50000_of_19144_vllm_en_de.json"),
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_50000_of_19144_vllm_zh_en.json"),
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_50000_of_19144_vllm_summarization.json"),
        ],
        "OpenOrca": [
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_50000_of_19144_vllm_en_de.json"),
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_50000_of_19144_vllm_zh_en.json"),
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_50000_of_19144_vllm_summarization.json"),
        ],
        "Platypus70B": [
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_50000_of_19144_vllm_en_de.json"),
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_50000_of_19144_vllm_zh_en.json"),
            join_with_root("outputs/raw/dev/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_50000_of_19144_vllm_summarization.json"),
        ]
    }

    zero_shot_test = {
        "Nous": [
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_5000_of_5091_vllm_en_es_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_5000_of_5091_vllm_en_zh_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_5000_of_5091_vllm_en_de_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_5000_of_5091_vllm_summarization_test.json"),
        ],
        "OpenOrca": [
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_5000_of_5091_vllm_en_es_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_5000_of_5091_vllm_en_zh_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_5000_of_5091_vllm_en_de_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_5000_of_5091_vllm_summarization_test.json"),
        ],
        "Platypus70B": [
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_5000_of_5091_vllm_en_es_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_5000_of_5091_vllm_en_zh_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_5000_of_5091_vllm_en_de_test.json"),
            join_with_root("outputs/raw/test/zero_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_5000_of_5091_vllm_summarization_test.json"),
        ]
    }

    few_shot_test = {
        "Nous": [
            join_with_root("outputs/raw/test/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_50000_of_5091_vllm_en_de.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_50000_of_5091_vllm_en_zh.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_50000_of_5091_vllm_en_es.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_NousResearch_Nous-Hermes"
                           "-13b_0_50000_of_5091_vllm_summarization.json"),
        ],
        "OpenOrca": [
            join_with_root("outputs/raw/test/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_50000_of_5091_vllm_en_de.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_50000_of_5091_vllm_en_zh.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_50000_of_5091_vllm_en_es.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_Open-Orca_OpenOrca-Platypus2-13B"
                           "_0_50000_of_5091_vllm_summarization.json"),
        ],
        "Platypus70B": [
            join_with_root("outputs/raw/test/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_50000_of_5091_vllm_en_de.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_50000_of_5091_vllm_en_zh.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_50000_of_5091_vllm_en_es.json"),
            join_with_root("outputs/raw/test/few_shot/slurm_pool_TheBloke_Platypus2-70B-Instruct-GPTQ"
                           "_0_50000_of_5091_vllm_summarization.json"),
        ]
    }


    df = reformat_df(zero_shot_train_emotion, outname="cleaned_zero_shot_train_emotion", force=True)
    #huge_ensemble(df, "blah")
    #print(df['score_unfilled'].explode().isna().sum())
    compute_correlation(df, outname="corr_zero_shot_train_emotion_avg")
