@inproceedings{rafieian-costa-jussa-2021-high,
title = "High Frequent In-domain Words Segmentation and Forward Translation for the {WMT}21 Biomedical Task",
author = "Rafieian, Bardia and
Costa-jussa, Marta R.",
editor = "Barrault, Loic and
Bojar, Ondrej and
Bougares, Fethi and
Chatterjee, Rajen and
Costa-jussa, Marta R. and
Federmann, Christian and
Fishel, Mark and
Fraser, Alexander and
Freitag, Markus and
Graham, Yvette and
Grundkiewicz, Roman and
Guzman, Paco and
Haddow, Barry and
Huck, Matthias and
Yepes, Antonio Jimeno and
Koehn, Philipp and
Kocmi, Tom and
Martins, Andre and
Morishita, Makoto and
Monz, Christof",
booktitle = "Proceedings of the Sixth Conference on Machine Translation",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.wmt-1.87/",
pages = "863--867",
abstract = "This paper reports the optimization of using the out-of-domain data in the Biomedical translation task. We firstly optimized our parallel training dataset using the BabelNet in-domain terminology words. Afterward, to increase the training set, we studied the effects of the out-of-domain data on biomedical translation tasks, and we created a mixture of in-domain and out-of-domain training sets and added more in-domain data using forward translation in the English-Spanish task. Finally, with a simple bpe optimization method, we increased the number of in-domain sub-words in our mixed training set and trained the Transformer model on the generated data. Results show improvements using our proposed method."
}
Markdown (Informal)
[High Frequent In-domain Words Segmentation and Forward Translation for the WMT21 Biomedical Task](https://preview.aclanthology.org/fix-sig-urls/2021.wmt-1.87/) (Rafieian & Costa-jussa, WMT 2021)
ACL