@inproceedings{gan-etal-2026-thinking, title = "Thinking-Based Non-Thinking: Solving the Reward Hacking Problem in Training Hybrid Reasoning Models via Reinforcement Learning", author = "Gan, Siyuan and Liu, Jiaheng and Wang, Boyan and Yang, Tianpei and Miao, Runqing and Zhang, Yuyao and Meng, Fanyu and Feng, Junlan and Meng, Linjian and Huo, Jing and Gao, Yang", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.2122/", pages = "45754--45771", ISBN = "979-8-89176-390-6" }