@inproceedings{yadav-etal-2024-a3,
title = "A3-108 Controlling Token Generation in Low Resource Machine Translation Systems",
author = "Yadav, Saumitra and
Mukherjee, Ananya and
Shrivastava, Manish",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.wmt-1.61/",
doi = "10.18653/v1/2024.wmt-1.61",
pages = "728--734",
abstract = "Translating for languages with limited resources poses a persistent challenge due to the scarcity of high-quality training data. To enhance translation accuracy, we explored controlled generation mechanisms, focusing on the importance of control tokens. In our experiments, while training, we encoded the target sentence length as a control token to the source sentence, treating it as an additional feature for the source sentence. We developed various NMT models using transformer architecture and conducted experiments across 8 language directions (English = Assamese, Manipuri, Khasi, and Mizo), exploring four variations of length encoding mechanisms. Through comparative analysis against the baseline model, we submitted two systems for each language direction. We report our findings for the same in this work."
}
Markdown (Informal)
[A3-108 Controlling Token Generation in Low Resource Machine Translation Systems](https://preview.aclanthology.org/fix-sig-urls/2024.wmt-1.61/) (Yadav et al., WMT 2024)
ACL