import os
from enum import Enum
from pathlib import Path
from typing import Dict, List

from pydantic import BaseModel
from pydantic.functional_validators import AfterValidator
from typing_extensions import Annotated

# validator, conint, FilePath


class DatasetName(str, Enum):
    lima = "lima"
    stackexchange = "stackexchange"
    sharegpt = "sharegpt"
    fastchat_oasst = "fastchat_oasst"
    oasst = "oasst"
    oig = "oig"
    mulima = "mulima"
    own_stackexchange = "own_stackexchange"
    mt_bench = "mt_bench"
    mt_bench_ref_answers = "mt_bench_ref_answers"
    mt_bench_judge_prompts = "mt_bench_judge_prompts"
    dolly = "dolly"
    hh_rlhf = "hh_rlhf"
    bactrianx = "bactrianx"
    alpaca = "alpaca"
    xP3mt = "xP3mt"
    nectar = "nectar"


class LanguageCode(str, Enum):
    DE = "DE"
    ES = "ES"
    IT = "IT"
    FR = "FR"
    EN = "EN"


class Translation(BaseModel):
    target_lang_codes: List[LanguageCode]
    api_key: str = os.environ["DEEPL_API_KEY"]


def to_existing_pathlib_path(paths: Dict):
    converterd_paths = {}
    for key, value in paths.items():
        assert Path(value).exists(), f"File path {value} does not exist"
        converterd_paths[key] = Path(value)
    return converterd_paths


class DataConfig(BaseModel):
    dataset_name: DatasetName
    # an open dict is used here, to allow for arbitrary key names
    dataset_name_to_path: Annotated[Dict, AfterValidator(to_existing_pathlib_path)] = {}


class AppConfig(BaseModel):
    data: DataConfig
    translation: Translation
    languages_per_dataset: List[List[LanguageCode]]
