@inproceedings{mao-etal-2025-cross, title = "Cross-Modal Learning for Music-to-Music-Video Description Generation", author = "Mao, Zhuoyuan and Zhao, Mengjie and Wu, Qiyu and Zhong, Zhi and Liao, Wei-Hsiang and Wakaki, Hiromi and Mitsufuji, Yuki", editor = "Adlakha, Vaibhav and Chronopoulou, Alexandra and Li, Xiang Lorraine and Majumder, Bodhisattwa Prasad and Shi, Freda and Vernikos, Giorgos", booktitle = "Proceedings of the 10th Workshop on Representation Learning for NLP (RepL4NLP-2025)", month = may, year = "2025", address = "Albuquerque, NM", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.repl4nlp-1.4/", pages = "51--58", ISBN = "979-8-89176-245-9" }