@inproceedings{fang-etal-2025-cart, title = "{CART}: A Generative Cross-Modal Retrieval Framework With Coarse-To-Fine Semantic Modeling", author = "Fang, Minghui and Ji, Shengpeng and Zuo, Jialong and Huang, Hai and Xia, Yan and Zhu, Jieming and Cheng, Xize and Yang, Xiaoda and Liu, Wenrui and Wang, Gang and Dong, Zhenhua and Zhao, Zhou", editor = "Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher", booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = jul, year = "2025", address = "Vienna, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.acl-long.735/", pages = "15120--15133", ISBN = "979-8-89176-251-0" }