@inproceedings{imankulova-etal-2020-towards,
title = "Towards Multimodal Simultaneous Neural Machine Translation",
author = "Imankulova, Aizhan and
Kaneko, Masahiro and
Hirasawa, Tosho and
Komachi, Mamoru",
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wmt-1.70",
pages = "594--603",
abstract = "Simultaneous translation involves translating a sentence before the speaker{'}s utterance is completed in order to realize real-time understanding in multiple languages. This task is significantly more challenging than the general full sentence translation because of the shortage of input information during decoding. To alleviate this shortage, we propose multimodal simultaneous neural machine translation (MSNMT), which leverages visual information as an additional modality. Our experiments with the Multi30k dataset showed that MSNMT significantly outperforms its text-only counterpart in more timely translation situations with low latency. Furthermore, we verified the importance of visual information during decoding by performing an adversarial evaluation of MSNMT, where we studied how models behaved with incongruent input modality and analyzed the effect of different word order between source and target languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="imankulova-etal-2020-towards">
<titleInfo>
<title>Towards Multimodal Simultaneous Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aizhan</namePart>
<namePart type="family">Imankulova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masahiro</namePart>
<namePart type="family">Kaneko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tosho</namePart>
<namePart type="family">Hirasawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Simultaneous translation involves translating a sentence before the speaker’s utterance is completed in order to realize real-time understanding in multiple languages. This task is significantly more challenging than the general full sentence translation because of the shortage of input information during decoding. To alleviate this shortage, we propose multimodal simultaneous neural machine translation (MSNMT), which leverages visual information as an additional modality. Our experiments with the Multi30k dataset showed that MSNMT significantly outperforms its text-only counterpart in more timely translation situations with low latency. Furthermore, we verified the importance of visual information during decoding by performing an adversarial evaluation of MSNMT, where we studied how models behaved with incongruent input modality and analyzed the effect of different word order between source and target languages.</abstract>
<identifier type="citekey">imankulova-etal-2020-towards</identifier>
<location>
<url>https://aclanthology.org/2020.wmt-1.70</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>594</start>
<end>603</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Multimodal Simultaneous Neural Machine Translation
%A Imankulova, Aizhan
%A Kaneko, Masahiro
%A Hirasawa, Tosho
%A Komachi, Mamoru
%S Proceedings of the Fifth Conference on Machine Translation
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F imankulova-etal-2020-towards
%X Simultaneous translation involves translating a sentence before the speaker’s utterance is completed in order to realize real-time understanding in multiple languages. This task is significantly more challenging than the general full sentence translation because of the shortage of input information during decoding. To alleviate this shortage, we propose multimodal simultaneous neural machine translation (MSNMT), which leverages visual information as an additional modality. Our experiments with the Multi30k dataset showed that MSNMT significantly outperforms its text-only counterpart in more timely translation situations with low latency. Furthermore, we verified the importance of visual information during decoding by performing an adversarial evaluation of MSNMT, where we studied how models behaved with incongruent input modality and analyzed the effect of different word order between source and target languages.
%U https://aclanthology.org/2020.wmt-1.70
%P 594-603
Markdown (Informal)
[Towards Multimodal Simultaneous Neural Machine Translation](https://aclanthology.org/2020.wmt-1.70) (Imankulova et al., WMT 2020)
ACL