@inproceedings{tang-etal-2022-multimodal,
title = "Multimodal Neural Machine Translation with Search Engine Based Image Retrieval",
author = "Tang, ZhenHao and
Zhang, XiaoBing and
Long, Zi and
Fu, XiangHua",
booktitle = "Proceedings of the 9th Workshop on Asian Translation",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Conference on Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.wat-1.11/",
pages = "89--98",
abstract = "Recently, numbers of works shows that the performance of neural machine translation (NMT) can be improved to a certain extent with using visual information. However, most of these conclusions are drawn from the analysis of experimental results based on a limited set of bilingual sentence-image pairs, such as Multi30K.In these kinds of datasets, the content of one bilingual parallel sentence pair must be well represented by a manually annotated image,which is different with the actual translation situation. we propose an open-vocabulary image retrieval methods to collect descriptive images for bilingual parallel corpus using image search engine, and we propose text-aware attentive visual encoder to filter incorrectly collected noise images. Experiment results on Multi30K and other two translation datasets show that our proposed method achieves significant improvements over strong baselines."
}
Markdown (Informal)
[Multimodal Neural Machine Translation with Search Engine Based Image Retrieval](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.wat-1.11/) (Tang et al., WAT 2022)
ACL