@inproceedings{anschutz-etal-2023-correct,
title = "This is not correct! Negation-aware Evaluation of Language Generation Systems",
author = {Ansch{\"u}tz, Miriam and
Miguel Lozano, Diego and
Groh, Georg},
editor = "Keet, C. Maria and
Lee, Hung-Yi and
Zarrie{\ss}, Sina",
booktitle = "Proceedings of the 16th International Natural Language Generation Conference",
month = sep,
year = "2023",
address = "Prague, Czechia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.inlg-main.12/",
doi = "10.18653/v1/2023.inlg-main.12",
pages = "163--175",
abstract = "Large language models underestimate the impact of negations on how much they change the meaning of a sentence. Therefore, learned evaluation metrics based on these models are insensitive to negations. In this paper, we propose NegBLEURT, a negation-aware version of the BLEURT evaluation metric. For that, we designed a rule-based sentence negation tool and used it to create the CANNOT negation evaluation dataset. Based on this dataset, we fine-tuned a sentence transformer and an evaluation metric to improve their negation sensitivity. Evaluating these models on existing benchmarks shows that our fine-tuned models outperform existing metrics on the negated sentences by far while preserving their base models' performances on other perturbations."
}
Markdown (Informal)
[This is not correct! Negation-aware Evaluation of Language Generation Systems](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.inlg-main.12/) (Anschütz et al., INLG-SIGDIAL 2023)
ACL