@inproceedings{pradhan-todi-2023-understanding,
title = "Understanding Large Language Model Based Metrics for Text Summarization",
author = "Pradhan, Abhishek and
Todi, Ketan",
editor = {Deutsch, Daniel and
Dror, Rotem and
Eger, Steffen and
Gao, Yang and
Leiter, Christoph and
Opitz, Juri and
R{\"u}ckl{\'e}, Andreas},
booktitle = "Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2023",
address = "Bali, Indonesia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.eval4nlp-1.12/",
doi = "10.18653/v1/2023.eval4nlp-1.12",
pages = "149--155",
abstract = "This paper compares the two most widely used techniques for evaluating generative tasks with large language models (LLMs): prompt-based evaluation and log-likelihood evaluation as part of the Eval4NLP shared task. We focus on the summarization task and evaluate both small and large LLM models. We also study the impact of LLAMA and LLAMA 2 on summarization, using the same set of prompts and techniques. We used the Eval4NLP dataset for our comparison. This study provides evidence of the advantages of prompt-based evaluation techniques over log-likelihood based techniques, especially for large models and models with better reasoning power."
}
Markdown (Informal)
[Understanding Large Language Model Based Metrics for Text Summarization](https://preview.aclanthology.org/fix-sig-urls/2023.eval4nlp-1.12/) (Pradhan & Todi, Eval4NLP 2023)
ACL