@inproceedings{zhu-etal-2026-data, title = "Data Efficient {RLVR} via Off-Policy Influence Guidance", author = "Zhu, Erle and Jiang, Dazhi and Wang, Yuan and Li, Xujun and Cheng, Jiale and Gu, Yuxian and Niu, Yilin and Zeng, Aohan and Tang, Jie and Huang, Minlie and Wang, Hongning", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.2141/", pages = "46167--46192", ISBN = "979-8-89176-390-6" }