@inproceedings{kim-etal-2025-learning-insert,
title = "Learning to Insert [{PAUSE}] Tokens for Better Reasoning",
author = "Kim, Eunki and
Kim, Sangryul and
Thorne, James",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.1217/",
pages = "23760--23777",
ISBN = "979-8-89176-256-5",
abstract = "To enhance reasoning capabilities, previous works have explored incorporating special-purpose tokens into the training process. These strategies strengthen the learning mechanism of transformer-based large language models (LLMs). Building on prior research, in which inserting dummy tokens consecutively just before reasoning steps can enhance effectiveness, we introduce a novel approach termed $\textbf{D}$ynamic $\textbf{I}$nserting Tokens $\textbf{T}$raining $\textbf{(DIT)}$. Our method identifies positions within sequences where model confidence is lowest according to token log-likelihood. Strategically inserting [PAUSE] tokens on these positions bolsters the model{'}s predictive capabilities for subsequent tokens. Experimental results across diverse datasets and models, from the 2.7B model to the 8B model, demonstrate that DIT consistently outperforms traditional fine-tuning and previous token insertion methods. With this simple yet effective method, we achieve accuracy gains of up to 4.7{\%}p on GSM8K, 3.23{\%}p on AQUA-RAT, and pass@1 improvements of up to 3.4{\%}p on MBPP datasets. Our work shows a model-based, dynamic approach rather than a heuristic one, thereby broadening the scope of research in reasoning."
}
Markdown (Informal)
[Learning to Insert [PAUSE] Tokens for Better Reasoning](https://preview.aclanthology.org/landing_page/2025.findings-acl.1217/) (Kim et al., Findings 2025)
ACL