"""Tokenizer utilities for experiments."""

from transformers import AutoTokenizer


special_tokens_dict = {
    'sep_token': '<sep>', 
    'cls_token': '<cls>', 
    'mask_token': '<mask>'
}

new_tokens = ['<title>', '<top>', '<left>', '<data>', '<operator>', '<answer>']


def prepare_tokenizer(name, verbose=False): 
    """Prepare the loaded tokenizer class given the (model) name."""

    tokenizer = AutoTokenizer.from_pretrained(name)
    # tokenizer.add_special_tokens(special_tokens_dict)
    # tokenizer.add_tokens(new_tokens)

    if verbose == True:
        print(f'[utils >> prep_tok] gets tokenizer from name [{name}]')
        print(f'[utils >> prep_tok] adds special tokens {list(special_tokens_dict.keys())}')

    return tokenizer

