@inproceedings{kartac-etal-2025-openlgauge,
title = "{O}pe{NLG}auge: An Explainable Metric for {NLG} Evaluation with Open-Weights {LLM}s",
author = "Kartac, Ivan and
Lango, Mateusz and
Dusek, Ondrej",
editor = "Flek, Lucie and
Narayan, Shashi and
Phương, L{\^e} Hồng and
Pei, Jiahuan",
booktitle = "Proceedings of the 18th International Natural Language Generation Conference",
month = oct,
year = "2025",
address = "Hanoi, Vietnam",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-lei-gao-usc/2025.inlg-main.19/",
pages = "292--337",
abstract = "Large Language Models (LLMs) have demonstrated great potential as evaluators of NLG systems, allowing for high-quality, reference-free, and multi-aspect assessments. However, existing LLM-based metrics suffer from two major drawbacks: reliance on proprietary models to generate training data or perform evaluations, and a lack of fine-grained, explanatory feedback. We introduce OpeNLGauge, a fully open-source, reference-free NLG evaluation metric that provides accurate explanations based on individual error spans. OpeNLGauge is available as a two-stage ensemble of larger open-weight LLMs, or as a small fine-tuned evaluation model, with confirmed generalizability to unseen tasks, domains and aspects. Our extensive meta-evaluation shows that OpeNLGauge achieves competitive correlation with human judgments, outperforming state-of-the-art models on certain tasks while maintaining full reproducibility and providing explanations more than twice as accurate."
}Markdown (Informal)
[OpeNLGauge: An Explainable Metric for NLG Evaluation with Open-Weights LLMs](https://preview.aclanthology.org/author-page-lei-gao-usc/2025.inlg-main.19/) (Kartac et al., INLG 2025)
ACL