@inproceedings{barua-etal-2024-using,
title = "Using Language Models to Disambiguate Lexical Choices in Translation",
author = "Barua, Josh and
Subramanian, Sanjay and
Yin, Kayo and
Suhr, Alane",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.emnlp-main.278/",
doi = "10.18653/v1/2024.emnlp-main.278",
pages = "4837--4848",
abstract = "In translation, a concept represented by a single word in a source language can have multiple variations in a target language. The task of lexical selection requires using context to identify which variation is most appropriate for a source text. We work with native speakers of nine languages to create DTAiLS, a dataset of 1,377 sentence pairs that exhibit cross-lingual concept variation when translating from English. We evaluate recent LLMs and neural machine translation systems on DTAiLS, with the best-performing model, GPT-4, achieving from 67 to 85{\%} accuracy across languages. Finally, we use language models to generate English rules describing target-language concept variations. Providing weaker models with high-quality lexical rules improves accuracy substantially, in some cases reaching or outperforming GPT-4."
}
Markdown (Informal)
[Using Language Models to Disambiguate Lexical Choices in Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.emnlp-main.278/) (Barua et al., EMNLP 2024)
ACL