@inproceedings{raunak-etal-2024-instruction,
title = "On Instruction-Finetuning Neural Machine Translation Models",
author = "Raunak, Vikas and
Grundkiewicz, Roman and
Junczys-Dowmunt, Marcin",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.wmt-1.114/",
doi = "10.18653/v1/2024.wmt-1.114",
pages = "1155--1166",
abstract = "In this work, we introduce instruction finetuning for Neural Machine Translation (NMT) models, which distills instruction following capabilities from Large Language Models (LLMs) into orders-of-magnitude smaller NMT models. Our instruction-finetuning recipe for NMT models enables customization of translations for a limited but disparate set of translation-specific tasks.We show that NMT models are capable of following multiple instructions simultaneously and demonstrate capabilities of zero-shot composition of instructions.We also show that through instruction finetuning, traditionally disparate tasks such as formality-controlled machine translation, multi-domain adaptation as well as multi-modal translations can be tackled jointly by a single instruction finetuned NMT model, at a performance level comparable to LLMs such as GPT-3.5-Turbo.To the best of our knowledge, our work is among the first to demonstrate the instruction-following capabilities of traditional NMT models, which allows for faster, cheaper and more efficient serving of customized translations."
}
Markdown (Informal)
[On Instruction-Finetuning Neural Machine Translation Models](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.wmt-1.114/) (Raunak et al., WMT 2024)
ACL