@inproceedings{zhang-etal-2025-diversification,
title = "Diversification Catalyzes Language Models' Instruction Generalization To Unseen Semantics",
author = "Zhang, Dylan and
Wang, Justin and
Charton, Francois",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.findings-acl.1193/",
pages = "23236--23249",
ISBN = "979-8-89176-256-5",
abstract = "Instruction-tuned language models excel in knowledge, reasoning, and instruction-following. While knowledge and reasoning are well-explored, the factors enabling generalization to unseen instructions remain underexplored due to challenges in isolating instruction-following dynamics.In this work, we model instruction-following as a computational process and design controlled experiments inspired by the Turing-complete Markov algorithm to disentangle its dynamics. Our findings reveal that the ability to generalize to instructions with unseen semantics emerges only when training data is strategically diversified across rich semantics. This finding gives us the hammer that breaks down the wall separating training instructions from unseen ones encountered in the wild. For specialist models, a balanced mix of in-domain and diverse out-of-domain tasks enhances performance more effectively than simply increasing in-domain data. For generalist models, domain diversification consistently outweighs the costs of reduced task-specific data, regardless of data budgets. Furthermore, we show that proper diversification with a lower data budget can outperform simply scaling up data volume. These findings highlight strategic data diversification as key to optimizing instruction-following and improving model performance across applications."
}
Markdown (Informal)
[Diversification Catalyzes Language Models’ Instruction Generalization To Unseen Semantics](https://preview.aclanthology.org/ingestion-acl-25/2025.findings-acl.1193/) (Zhang et al., Findings 2025)
ACL