@inproceedings{wang-etal-2026-outcome, title = "Outcome Accuracy is Not Enough: Aligning the Reasoning Process of Reward Models", author = "Wang, Binghai and Liu, Yantao and Liu, Yuxuan and Tang, Tianyi and Wang, Shenzhi and Gao, Chang and Zheng, Chujie and Zhang, Yichang and Yu, Le and Liu, Shixuan and Gui, Tao and Zhang, Qi and Huang, Xuanjing and Yu, Bowen and Huang, Fei and Lin, Junyang", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1924/", pages = "41482--41508", ISBN = "979-8-89176-390-6" }