@article{nishida-etal-2026-instructsum,
title = "{I}nstruct{S}um: A Benchmark to Evaluate Instruction-Following Capability of Large Language Models in Summarization",
author = "Nishida, Kosuke and
Nishida, Kyosuke and
Saito, Itsumi",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.779/",
pages = "9940--9952",
abstract = "Pre-trained large language models (LLMs) align their outputs with user intent through natural language instructions. In the summarization task, conciseness of the output is inherently required, which makes the instruction-following capability of LLMs particularly important. That is, providing supplementary information beyond the instruction can be undesirable. In this study, we introduce a novel benchmark, InstructSum, consisting of 3,309 types of instructions to evaluate the instruction-following capability in the summarization task. InstructSum has multiple instructions per source text, and thus it enables the evaluation of how LLMs adjust the content of the summary according to the instructions. Our experiments with six LLM families revealed the challenges that LLMs face in this task. For example, LLMs provide polite and helpful responses with irrelevant information; they go beyond instructions and fail to respond with a concise summary."
}Markdown (Informal)
[InstructSum: A Benchmark to Evaluate Instruction-Following Capability of Large Language Models in Summarization](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.779/) (Nishida et al., LREC 2026)
ACL