@inproceedings{li-etal-2025-styletts,
    title = "{S}tyle{TTS}-{ZS}: Efficient High-Quality Zero-Shot Text-to-Speech Synthesis with Distilled Time-Varying Style Diffusion",
    author = "Li, Yinghao Aaron  and
      Jiang, Xilin  and
      Han, Cong  and
      Mesgarani, Nima",
    editor = "Chiruzzo, Luis  and
      Ritter, Alan  and
      Wang, Lu",
    booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
    month = apr,
    year = "2025",
    address = "Albuquerque, New Mexico",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/moar-dois/2025.naacl-long.242/",
    doi = "10.18653/v1/2025.naacl-long.242",
    pages = "4725--4744",
    ISBN = "979-8-89176-189-6"
}