@inproceedings{liu-etal-2026-unimoe, title = "{U}ni{M}o{E}-Audio: Unified Speech and Music Generation with Dynamic-Capacity Mixture-of-Experts", author = "Liu, Zhenyu and li, Yunxin and Zhang, Xuanyu and Teng, Qixun and Jiang, Shenyuan and Chen, Xinyu and Shi, Haoyuan and Chen, Haolan and Meng, Fanbo and Zhao, Mingjun and Xu, Yu and He, Yancheng and Hu, Baotian and Li, Haizhou and Zhang, Min", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.412/", pages = "9107--9119", ISBN = "979-8-89176-390-6" }