@inproceedings{liu-etal-2025-detect,
    title = "Detect, Disambiguate, and Translate: On-Demand Visual Reasoning for Multimodal Machine Translation with Large Vision-Language Models",
    author = "Liu, Danyang  and
      Kong, Fanjie  and
      Sun, Xiaohang  and
      Patil, Dhruva  and
      Vajpayee, Avijit  and
      Liu, Zhu  and
      Bhat, Vimal  and
      Sadoughi, Najmeh",
    editor = "Chiruzzo, Luis  and
      Ritter, Alan  and
      Wang, Lu",
    booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
    month = apr,
    year = "2025",
    address = "Albuquerque, New Mexico",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/landing_page/2025.naacl-long.74/",
    pages = "1559--1570",
    ISBN = "979-8-89176-189-6"
}