@inproceedings{gong-etal-2026-probing,
    title = "Probing Audio-Visual Reasoning in Multimodal Language Models through the Lens of Audio",
    author = "Gong, Kaixiong  and
      Feng, Kaituo  and
      Li, Bohao  and
      Wang, Yibing  and
      Cheng, Mofan  and
      Yang, Shijia  and
      Han, Jiaming  and
      Wang, Benyou  and
      Bai, Yutong  and
      Yang, Zhuoran  and
      Yue, Xiangyu",
    editor = "Liakata, Maria  and
      Moreira, Viviane P.  and
      Zhang, Jiajun  and
      Jurgens, David",
    booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2026",
    address = "San Diego, California, United States",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1697/",
    pages = "36603--36645",
    ISBN = "979-8-89176-390-6"
}