import numpy as np
from Levenshtein import distance
import nltk.translate.chrf_score as chrf_score


def get_metric_dict(old_string, new_string, tokenizer):
    """This function returns a dictionnary containing the perturbation metric 
    of a specific transformation.

    Keyword arguments:
    old_string -- original string
    new_string -- perturbed string
    tokenizer  -- roberta-base tokenizer to measure the compression rate
    """

    levenshtein_distance_normalized = distance(old_string, new_string) / max(1, len(old_string))
    chrf_2 = chrf_score.sentence_chrf(old_string.split(), 
                                      new_string.split(), 
                                      min_len=2,
                                      max_len=2, 
                                      ignore_whitespace=False)
    
    encoded_length = len(tokenizer.encode_plus(
                        new_string,
                        padding=False,
                        truncation=False,
                        return_tensors="pt",).input_ids[0])
    metric_dict = {
        "levenshtein_distance_normalized": levenshtein_distance_normalized,
        "compression_rate": len(old_string) / encoded_length,
        "chrf_2": chrf_2
    }

    return metric_dict
