import argparse
import os
import glob
from pathlib import Path
import json

from tqdm import tqdm
import pandas as pd

thisdir = os.path.dirname(os.path.realpath(__file__))
parser = argparse.ArgumentParser(description="Export SEAT results.")
parser.add_argument(
    "--persistent_dir",
    action="store",
    default=os.path.realpath(os.path.join(thisdir, "..")),
    type=str,
    help="Directory where all persistent data will be stored.",
)
parser.add_argument(
    "--checkpoint_dir",
    action="store",
    type=str,
    required=True,
    help="Directory where GLUE evaluation results files are.",
)


metric_mapping = {
    "cola": "eval_matthews_correlation",
    "mnli": "eval_accuracy",
    "mrpc": "eval_f1",
    "qnli": "eval_accuracy",
    "qqp": "eval_accuracy",
    "rte": "eval_accuracy",
    "sst2": "eval_accuracy",
    "stsb": "eval_pearson",
    "wnli": "eval_accuracy",
}

if __name__ == "__main__":
    args = parser.parse_args()

    print("Exporting GLUE results:")
    print(f" - persistent_dir: {args.persistent_dir}")
    print(f" - checkpoint_dir: {args.checkpoint_dir}")

    result_files = glob.glob(
        f"{args.checkpoint_dir}/**/eval_results.json", recursive=True
    )

    records = []
    for result_file in tqdm(result_files, desc="Parsing GLUE results"):
        path = Path(result_file)

        # Extract experiment ID from path.
        parts = path.parts
        task_name, seed, experiment_id = parts[-2], parts[-3], parts[-4]

        with open(result_file, "r") as f:
            results = json.load(f)

        eval_metric = metric_mapping[task_name]
        score = results[eval_metric]

        records.append(
            {
                "experiment_id": experiment_id,
                "seed": seed,
                "task_name": task_name,
                "score": score,
            }
        )

    df = pd.DataFrame.from_records(records)
    df.to_csv("glue.csv")
    print(df)
