@inproceedings{dutta-chowdhury-etal-2018-multimodal,
title = "Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data",
author = "Dutta Chowdhury, Koel and
Hasanuzzaman, Mohammed and
Liu, Qun",
booktitle = "Proceedings of the Workshop on Deep Learning Approaches for Low-Resource {NLP}",
month = jul,
year = "2018",
address = "Melbourne",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3405",
doi = "10.18653/v1/W18-3405",
pages = "33--42",
abstract = "In this paper, we investigate the effectiveness of training a multimodal neural machine translation (MNMT) system with image features for a low-resource language pair, Hindi and English, using synthetic data. A three-way parallel corpus which contains bilingual texts and corresponding images is required to train a MNMT system with image features. However, such a corpus is not available for low resource language pairs. To address this, we developed both a synthetic training dataset and a manually curated development/test dataset for Hindi based on an existing English-image parallel corpus. We used these datasets to build our image description translation system by adopting state-of-the-art MNMT models. Our results show that it is possible to train a MNMT system for low-resource language pairs through the use of synthetic data and that such a system can benefit from image features.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dutta-chowdhury-etal-2018-multimodal">
<titleInfo>
<title>Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Koel</namePart>
<namePart type="family">Dutta Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Hasanuzzaman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qun</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-jul</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we investigate the effectiveness of training a multimodal neural machine translation (MNMT) system with image features for a low-resource language pair, Hindi and English, using synthetic data. A three-way parallel corpus which contains bilingual texts and corresponding images is required to train a MNMT system with image features. However, such a corpus is not available for low resource language pairs. To address this, we developed both a synthetic training dataset and a manually curated development/test dataset for Hindi based on an existing English-image parallel corpus. We used these datasets to build our image description translation system by adopting state-of-the-art MNMT models. Our results show that it is possible to train a MNMT system for low-resource language pairs through the use of synthetic data and that such a system can benefit from image features.</abstract>
<identifier type="citekey">dutta-chowdhury-etal-2018-multimodal</identifier>
<identifier type="doi">10.18653/v1/W18-3405</identifier>
<location>
<url>https://aclanthology.org/W18-3405</url>
</location>
<part>
<date>2018-jul</date>
<extent unit="page">
<start>33</start>
<end>42</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data
%A Dutta Chowdhury, Koel
%A Hasanuzzaman, Mohammed
%A Liu, Qun
%S Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP
%D 2018
%8 jul
%I Association for Computational Linguistics
%C Melbourne
%F dutta-chowdhury-etal-2018-multimodal
%X In this paper, we investigate the effectiveness of training a multimodal neural machine translation (MNMT) system with image features for a low-resource language pair, Hindi and English, using synthetic data. A three-way parallel corpus which contains bilingual texts and corresponding images is required to train a MNMT system with image features. However, such a corpus is not available for low resource language pairs. To address this, we developed both a synthetic training dataset and a manually curated development/test dataset for Hindi based on an existing English-image parallel corpus. We used these datasets to build our image description translation system by adopting state-of-the-art MNMT models. Our results show that it is possible to train a MNMT system for low-resource language pairs through the use of synthetic data and that such a system can benefit from image features.
%R 10.18653/v1/W18-3405
%U https://aclanthology.org/W18-3405
%U https://doi.org/10.18653/v1/W18-3405
%P 33-42
Markdown (Informal)
[Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data](https://aclanthology.org/W18-3405) (Dutta Chowdhury et al., 2018)
ACL