@inproceedings{sakai-etal-2025-revisiting,
title = "Revisiting Compositional Generalization Capability of Large Language Models Considering Instruction Following Ability",
author = "Sakai, Yusuke and
Kamigaito, Hidetaka and
Watanabe, Taro",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1508/",
pages = "31219--31238",
ISBN = "979-8-89176-251-0",
abstract = "In generative commonsense reasoning tasks such as CommonGen, generative large language models (LLMs) compose sentences that include all given concepts. However, when focusing on instruction-following capabilities, if a prompt specifies a concept order, LLMs must generate sentences that adhere to the specified order. To address this, we propose Ordered CommonGen, a benchmark designed to evaluate the compositional generalization and instruction-following abilities of LLMs. This benchmark measures ordered coverage to assess whether concepts are generated in the specified order, enabling a simultaneous evaluation of both abilities. We conducted a comprehensive analysis using 36 LLMs and found that, while LLMs generally understand the intent of instructions, biases toward specific concept order patterns often lead to low-diversity outputs or identical results even when the concept order is altered. Moreover, even the most instruction-compliant LLM achieved only about 75{\%} ordered coverage, highlighting the need for improvements in both instruction-following and compositional generalization capabilities."
}
Markdown (Informal)
[Revisiting Compositional Generalization Capability of Large Language Models Considering Instruction Following Ability](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1508/) (Sakai et al., ACL 2025)
ACL