@inproceedings{li-etal-2026-reinforcement, title = "Reinforcement Learning on Pre-Training Data", author = "Li, Siheng and Li, Kejiao and Xu, Zenan and Huang, Guanhua and Li, Kun and Wu, Haoyuan and Wujiajia and Zheng, Zihao and Zhang, Chenchen and Shi, Kun and Gong, Xue and Yi, Qi and Xiong, Ruibin and Xu, Tingqiang and Jiang, Yuhao and Yan, Jianfeng and Zeng, Yuyuan and Xu, Guanghui and Xue, Jinbao and xu, Zhijiang and Fang, Zheng and LI, Shuai and Liu, Qibin and Li, Xiaoxue and Li, Zhuoyu and Tao, Yangyu and Gao, Fei and Jiang, Cheng and Wang, Bochao and Liu, Kai and Zhu, Jianchen and Lam, Wai and Zhou, Bo and Wang, Di", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.506/", pages = "11046--11057", ISBN = "979-8-89176-390-6" }