@inproceedings{chen-etal-2026-efficient-hyperparameter,
title = "Efficient Hyperparameter Optimization for {LLM} Reinforcement Learning",
author = "Chen, Minping and
Xiao, Bowen and
Liang, Du and
Zeng, Chuxuan and
Wen, Zeyi",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1271/",
pages = "27540--27552",
ISBN = "979-8-89176-390-6",
abstract = "Hyperparameters are critical to LLM reinforcement learning (RL), but existing hyperparameter optimization (HPO) methods remain inefficient in this area, due to the massive model scale and resource-intensive training cycles. In this paper, we propose Joint Fidelity Hyperparameter Optimization (JF-HPO), which simultaneously adapts both model size and training budget as fidelity. JF-HPO is empowered by: (i) a small proxy model of the target LLM for efficient training and evaluation in each HPO trial; (ii) several carefully designed early-stopping strategies based on training dynamics; (iii) an efficient checkpointing mechanism to eliminate redundant computations. JF-HPO significantly improves the computational efficiency of each trial (up to 14.9$\times$) compared with existing HPO methods, thus achieving better predictive accuracy in most cases under the same time budget. Notably, JF-HPO delivers performance improvements ranging from 5.8{\%} to 111.6{\%} over VeRL Recipe."
}Markdown (Informal)
[Efficient Hyperparameter Optimization for LLM Reinforcement Learning](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1271/) (Chen et al., ACL 2026)
ACL
- Minping Chen, Bowen Xiao, Du Liang, Chuxuan Zeng, and Zeyi Wen. 2026. Efficient Hyperparameter Optimization for LLM Reinforcement Learning. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 27540–27552, San Diego, California, United States. Association for Computational Linguistics.