@inproceedings{liu-etal-2026-unimoe,
    title = "{U}ni{M}o{E}-Audio: Unified Speech and Music Generation with Dynamic-Capacity Mixture-of-Experts",
    author = "Liu, Zhenyu  and
      li, Yunxin  and
      Zhang, Xuanyu  and
      Teng, Qixun  and
      Jiang, Shenyuan  and
      Chen, Xinyu  and
      Shi, Haoyuan  and
      Chen, Haolan  and
      Meng, Fanbo  and
      Zhao, Mingjun  and
      Xu, Yu  and
      He, Yancheng  and
      Hu, Baotian  and
      Li, Haizhou  and
      Zhang, Min",
    editor = "Liakata, Maria  and
      Moreira, Viviane P.  and
      Zhang, Jiajun  and
      Jurgens, David",
    booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2026",
    address = "San Diego, California, United States",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.412/",
    pages = "9107--9119",
    ISBN = "979-8-89176-390-6"
}