@inproceedings{xie-etal-2021-tentrans,
title = "{T}en{T}rans Large-Scale Multilingual Machine Translation System for {WMT}21",
author = "Xie, Wanying and
Hu, Bojie and
Yang, Han and
Yu, Dong and
Ju, Qi",
editor = "Barrault, Loic and
Bojar, Ondrej and
Bougares, Fethi and
Chatterjee, Rajen and
Costa-jussa, Marta R. and
Federmann, Christian and
Fishel, Mark and
Fraser, Alexander and
Freitag, Markus and
Graham, Yvette and
Grundkiewicz, Roman and
Guzman, Paco and
Haddow, Barry and
Huck, Matthias and
Yepes, Antonio Jimeno and
Koehn, Philipp and
Kocmi, Tom and
Martins, Andre and
Morishita, Makoto and
Monz, Christof",
booktitle = "Proceedings of the Sixth Conference on Machine Translation",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/2021.wmt-1.53/",
pages = "439--445",
abstract = "This paper describes TenTrans large-scale multilingual machine translation system for WMT 2021. We participate in the Small Track 2 in five South East Asian languages, thirty directions: Javanese, Indonesian, Malay, Tagalog, Tamil, English. We mainly utilized forward/back-translation, in-domain data selection, knowledge distillation, and gradual fine-tuning from the pre-trained model FLORES-101. We find that forward/back-translation significantly improves the translation results, data selection and gradual fine-tuning are particularly effective during adapting domain, while knowledge distillation brings slight performance improvement. Also, model averaging is used to further improve the translation performance based on these systems. Our final system achieves an average BLEU score of 28.89 across thirty directions on the test set."
}