@inproceedings{liu-etal-2023-crossing,
title = "Crossing the Threshold: Idiomatic Machine Translation through Retrieval Augmentation and Loss Weighting",
author = "Liu, Emmy and
Chaudhary, Aditi and
Neubig, Graham",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.emnlp-main.933/",
doi = "10.18653/v1/2023.emnlp-main.933",
pages = "15095--15111",
abstract = "Idioms are common in everyday language, but often pose a challenge to translators because their meanings do not follow from the meanings of their parts. Despite significant advances, machine translation systems still struggle to translate idiomatic expressions. We provide a simple characterization of idiomatic translation and related issues. This allows us to conduct a synthetic experiment revealing a tipping point at which transformer-based machine translation models correctly default to idiomatic translations. To expand multilingual resources, we compile a dataset of {\textasciitilde}4k natural sentences containing idiomatic expressions in French, Finnish, and Japanese. To improve translation of natural idioms, we introduce two straightforward yet effective techniques: the strategic upweighting of training loss on potentially idiomatic sentences, and using retrieval-augmented models. This not only improves the accuracy of a strong pretrained MT model on idiomatic sentences by up to 13{\%} in absolute accuracy, but also holds potential benefits for non-idiomatic sentences."
}
Markdown (Informal)
[Crossing the Threshold: Idiomatic Machine Translation through Retrieval Augmentation and Loss Weighting](https://preview.aclanthology.org/fix-sig-urls/2023.emnlp-main.933/) (Liu et al., EMNLP 2023)
ACL