@inproceedings{larionov-eger-2025-promptoptme,
title = "{P}rompt{O}pt{M}e: Error-Aware Prompt Compression for {LLM}-based {MT} Evaluation Metrics",
author = "Larionov, Daniil and
Eger, Steffen",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.naacl-long.592/",
pages = "11807--11820",
ISBN = "979-8-89176-189-6",
abstract = "Evaluating the quality of machine-generated natural language content is a challenging task in Natural Language Processing (NLP). Recently, large language models (LLMs) like GPT-4 have been employed for this purpose, but they are computationally expensive due to the extensive token usage required by complex evaluation prompts. In this paper, we propose a prompt optimization approach that uses a smaller, fine-tuned language model to compress input data for evaluation prompt, thus reducing token usage and computational cost when using larger LLMs for downstream evaluation. Our method involves a two-stage fine-tuning process: supervised fine-tuning followed by preference optimization to refine the model`s outputs based on human preferences. We focus on Machine Translation (MT) evaluation and utilize the GEMBA-MQM metric as a starting point. Our results show a $2.37\times$ reduction in token usage without any loss in evaluation quality. This work makes state-of-the-art LLM-based metrics like GEMBA-MQM more cost-effective and efficient, enhancing their accessibility for broader use."
}
Markdown (Informal)
[PromptOptMe: Error-Aware Prompt Compression for LLM-based MT Evaluation Metrics](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.naacl-long.592/) (Larionov & Eger, NAACL 2025)
ACL