@inproceedings{xiong-zhao-2025-giift,
title = "{GIIFT}: Graph-guided Inductive Image-free Multimodal Machine Translation",
author = "Xiong, Jiafeng and
Zhao, Yuting",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.6/",
pages = "98--112",
ISBN = "979-8-89176-341-8",
abstract = "Multimodal Machine Translation (MMT) has demonstrated the significant help of visual information in machine translation. However, existing MMT methods face challenges in leveraging the modality gap by enforcing rigid visual-linguistic alignment whilst being confined to inference within their trained multimodal domains. In this work, we construct novel multimodal scene graphs to preserve and integrate modality-specific information and introduce GIIFT, a two-stage Graph-guided Inductive Image-Free MMT framework that uses a cross-modal Graph Attention Network adapter to learn multimodal knowledge in a unified fused space and inductively generalize it to broader image-free translation domains. Experimental results on the Multi30K dataset of English-to-French and English-to-German tasks demonstrate that our GIIFT surpasses existing approaches and achieves the state-of-the-art, even without images during inference. Results on the WMT benchmark show significant improvements over the image-free translation baselines, demonstrating the strength of GIIFT towards inductive image-free inference."
}Markdown (Informal)
[GIIFT: Graph-guided Inductive Image-free Multimodal Machine Translation](https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.6/) (Xiong & Zhao, WMT 2025)
ACL