@inproceedings{wang-etal-2025-offline, title = "Offline Reinforcement Learning for {LLM} Multi-step Reasoning", author = "Wang, Huaijie and Hao, Shibo and Dong, Hanze and Zhang, Shenao and Bao, Yilin and Yang, Ziran and Wu, Yi", editor = "Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher", booktitle = "Findings of the Association for Computational Linguistics: ACL 2025", month = jul, year = "2025", address = "Vienna, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.464/", pages = "8881--8893", ISBN = "979-8-89176-256-5" }