@inproceedings{lin-etal-2026-mm, title = "{MM}-Doc-R1: Training Agents for Long Document Visual Question Answering through Multi-turn Reinforcement Learning", author = "Lin, Jiahang and Hu, Kai and Wang, Binghai and Zhou, Yuhao and Xi, Zhiheng and Guo, Honglin and Liu, Shichun and Wang, Junzhe and Dou, Shihan and Zhou, Enyu and Yan, Hang and Han, Zhenhua and Gui, Tao and Zhang, Qi and Huang, Xuanjing", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1488/", pages = "29770--29783", ISBN = "979-8-89176-395-1" }