@inproceedings{zhou-etal-2025-transsionmts,
title = "{T}ranssion{MT}{'}s Submission to the {I}ndic {MT} Shared Task in {WMT} 2025",
author = "Zhou, Zebiao and
Li, Hui and
Zhu, Xiangxun and
Liu, Kangzhen",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.106/",
pages = "1271--1275",
ISBN = "979-8-89176-341-8",
abstract = "This study addresses the low-resource Indian lan- 002guage translation task (English Assamese, English Ma- 003nipuri) at WMT 2025, proposing a cross-iterative back- 004translation and data augmentation approach based on 005dual pre-trained models to enhance translation perfor- 006mance in low-resource scenarios. The research method- 007ology primarily encompasses four aspects: (1) Utilizing 008open-source pre-trained models IndicTrans2{\_}1B and 009NLLB{\_}3.3B, fine-tuning them on official bilingual data, 010followed by alternating back-translation and incremen- 011tal training to generate high-quality pseudo-parallel cor- 012pora and optimize model parameters through multiple 013iterations; (2) Employing the open-source semantic sim- 014ilarity model (all-mpnet-base-v2) to filter monolingual 015sentences with low semantic similarity to the test set 016from open-source corpora such as NLLB and BPCC, 017thereby improving the relevance of monolingual data 018to the task; (3) Cleaning the training data, including 019removing URL and HTML format content, eliminating 020untranslated sentences in back-translation, standardiz- 021ing symbol formats, and normalizing capitalization of 022the first letter; (4) During the model inference phase, 023combining the outputs generated by the fine-tuned In- 024dicTrans2{\_}1B and NLLB3.3B"
}Markdown (Informal)
[TranssionMT’s Submission to the Indic MT Shared Task in WMT 2025](https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.106/) (Zhou et al., WMT 2025)
ACL