@inproceedings{gong-etal-2026-probing, title = "Probing Audio-Visual Reasoning in Multimodal Language Models through the Lens of Audio", author = "Gong, Kaixiong and Feng, Kaituo and Li, Bohao and Wang, Yibing and Cheng, Mofan and Yang, Shijia and Han, Jiaming and Wang, Benyou and Bai, Yutong and Yang, Zhuoran and Yue, Xiangyu", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1697/", pages = "36603--36645", ISBN = "979-8-89176-390-6" }