@inproceedings{zhu-etal-2026-vrpo, title = "{VRPO}: Rethinking Value Modeling for Robust {RL} under Noisy Supervision in {LLM} Post-Training", author = "Zhu, Dingwei and Dou, Shihan and Xi, Zhiheng and Jin, Senjie and Zhang, Guoqiang and Zhang, Jiazheng and Ye, Junjie and Chai, Mingxu and Zhou, Enyu and Zhang, Ming and Wang, Yuhui and Huang, Caishuang and Huang, Chenhao and Zhang, Yunke and Wang, Yuran and Gui, Tao and Zhang, Qi and Qiu, Xipeng and Huang, Xuanjing", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1103/", pages = "24046--24067", ISBN = "979-8-89176-390-6" }