import argparse
import json
import os
import re

import pandas as pd

thisdir = os.path.dirname(os.path.realpath(__file__))
parser = argparse.ArgumentParser(description="Export StereoSet results.")
parser.add_argument(
    "--persistent_dir",
    action="store",
    type=str,
    default=os.path.realpath(os.path.join(thisdir, "..")),
    help="Directory where all persistent data will be stored.",
)
parser.add_argument(
    "--bias_type",
    action="store",
    type=str,
    choices=["gender", "race", "religion", "profession", "overall"],
    default="gender",
    help="Which type of bias to export results for.",
)
parser.add_argument(
    "--split",
    action="store",
    type=str,
    choices=["dev", "test"],
    default="dev",
    help="Which StereoSet split to export results for.",
)
parser.add_argument(
    "--score_type",
    action="store",
    type=str,
    choices=["likelihood"],
    default="likelihood",
    help="Which StereoSet score type to export results for.",
)


def _parse_experiment_id(experiment_id):
    bias_type = None

    # TODO Find a way to unify these expressions.
    if "_t-" in experiment_id:
        (
            intrasentence_model,
            model_name_or_path,
            bias_type,
            score_type,
            split,
        ) = re.match(
            "stereoset_intra-([A-Za-z0-9]+)_c-([A-Za-z0-9-]+)_t-([A-Za-z-]+)_s-([A-Za-z-]+)_d-([A-Za-z]+)",
            experiment_id,
        ).groups()
    else:
        intrasentence_model, model_name_or_path, score_type, split = re.match(
            "stereoset_intra-([A-Za-z0-9]+)_c-([A-Za-z0-9-]+)_s-([A-Za-z-]+)_d-([A-Za-z]+)",
            experiment_id,
        ).groups()

    return intrasentence_model, model_name_or_path, bias_type, score_type, split


if __name__ == "__main__":
    args = parser.parse_args()

    print("Exporting StereoSet results:")
    print(f" - persistent_dir: {args.persistent_dir}")
    print(f" - bias_type: {args.bias_type}")
    print(f" - split: {args.split}")
    print(f" - score_type: {args.score_type}")

    # Load the StereoSet model scores.
    with open(f"{args.persistent_dir}/results/stereoset/results.json", "r") as f:
        results = json.load(f)

    records = []
    for experiment_id in results:
        (
            intrasentence_model,
            model_name_or_path,
            bias_type,
            score_type,
            split,
        ) = _parse_experiment_id(experiment_id)

        # Skip records we don't want to export.
        if split != args.split or score_type != args.score_type:
            continue
        if bias_type is not None and bias_type != args.bias_type:
            continue

        # Re-format the data.
        records.append(
            {
                "experiment_id": experiment_id,
                "intrasentence_model": intrasentence_model,
                "model_name_or_path": model_name_or_path,
                "score_type": score_type,
                "split": split,
                "ss": results[experiment_id]["intrasentence"][args.bias_type][
                    "SS Score"
                ],
                "lms": results[experiment_id]["intrasentence"][args.bias_type][
                    "LM Score"
                ],
            }
        )

    df = pd.DataFrame.from_records(records)
    print(df)

    df = df[["experiment_id", "ss", "lms"]]

    with pd.option_context("max_colwidth", 1000):
        print(df.to_latex(float_format="%.2f", index=False))
