@inproceedings{mao-etal-2025-cross,
    title = "Cross-Modal Learning for Music-to-Music-Video Description Generation",
    author = "Mao, Zhuoyuan  and
      Zhao, Mengjie  and
      Wu, Qiyu  and
      Zhong, Zhi  and
      Liao, Wei-Hsiang  and
      Wakaki, Hiromi  and
      Mitsufuji, Yuki",
    editor = "Adlakha, Vaibhav  and
      Chronopoulou, Alexandra  and
      Li, Xiang Lorraine  and
      Majumder, Bodhisattwa Prasad  and
      Shi, Freda  and
      Vernikos, Giorgos",
    booktitle = "Proceedings of the 10th Workshop on Representation Learning for NLP (RepL4NLP-2025)",
    month = may,
    year = "2025",
    address = "Albuquerque, NM",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.repl4nlp-1.4/",
    pages = "51--58",
    ISBN = "979-8-89176-245-9"
}