@inproceedings{xuanfan-piji-2023-systematic,
title = "A Systematic Evaluation of Large Language Models for Natural Language Generation Tasks",
author = "Xuanfan, Ni and
Piji, Li",
editor = "Zhang, Jiajun",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics (Volume 2: Frontier Forum)",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2023.ccl-2.4/",
pages = "40--56",
language = "eng",
abstract = "{\textquotedblleft}Recent efforts have evaluated large language models (LLMs) in areas such as com-monsense reasoning, mathematical reasoning, and code generation. However, to thebest of our knowledge, no work has specifically investigated the performance of LLMsin natural language generation (NLG) tasks, a pivotal criterion for determining modelexcellence. Thus, this paper conducts a comprehensive evaluation of well-known andhigh-performing LLMs, namely ChatGPT, ChatGLM, T5-based models, LLaMA-basedmodels, and Pythia-based models, in the context of NLG tasks. We select English andChinese datasets encompassing Dialogue Generation and Text Summarization. More-over, we propose a common evaluation setting that incorporates input templates andpost-processing strategies. Our study reports both automatic results, accompanied by adetailed analysis.{\textquotedblright}"
}
Markdown (Informal)
[A Systematic Evaluation of Large Language Models for Natural Language Generation Tasks](https://preview.aclanthology.org/add-emnlp-2024-awards/2023.ccl-2.4/) (Xuanfan & Piji, CCL 2023)
ACL