@inproceedings{qiang-etal-2026-unisonate, title = "{U}ni{S}onate: A Unified Model for Speech, Music, and Sound Effect Generation with Text Instructions", author = "Qiang, Chunyu and Wang, Xiaopeng and Yin, Kang and Liang, Yuzhe and Guo, Yuxin and Ma, Teng and Zhang, Ziyu and Wang, Tianrui and Gong, Cheng and Chen, Yushen and Fu, Ruibo and Wang, Longbiao and Dang, Jianwu", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1293/", pages = "28043--28054", ISBN = "979-8-89176-390-6" }