@inproceedings{q-etal-2011-deriving,
title = "Deriving translation units using small additional corpora",
author = "Henr{\'i}quez Q., Carlos A. and
Mari{\~n}o, Jos{\'e} B. and
Banchs, Rafael E.",
editor = "Forcada, Mikel L. and
Depraetere, Heidi and
Vandeghinste, Vincent",
booktitle = "Proceedings of the 15th Annual Conference of the European Association for Machine Translation",
month = may # " 30–31",
year = "2011",
address = "Leuven, Belgium",
publisher = "European Association for Machine Translation",
url = "https://preview.aclanthology.org/bulk-corrections-2025-11-25/2011.eamt-1.18/",
abstract = "We present a novel strategy to derive new translation units using an additional bilingual corpus and a previously trained SMT system. The units were used to adapt the SMT system. The derivation process can be applied when the additional corpus is very small compared with the original train corpus and it does not require to compute new word alignments using all corpora. The strategy is based in the Levenshtein Distance and its resulting path. We reported a statistically significant improvement, with a confidence level of 99{\%}, when adapting an Ngram-based Catalan-Spanish system using an additional corpus that represents less than 0.5{\%} of the original train corpus. The additional translation units were able to solve morphological and lexical errors and added previously unknown words to the vocabulary."
}Markdown (Informal)
[Deriving translation units using small additional corpora](https://preview.aclanthology.org/bulk-corrections-2025-11-25/2011.eamt-1.18/) (Henríquez Q. et al., EAMT 2011)
ACL
- Carlos A. Henríquez Q., José B. Mariño, and Rafael E. Banchs. 2011. Deriving translation units using small additional corpora. In Proceedings of the 15th Annual Conference of the European Association for Machine Translation, Leuven, Belgium. European Association for Machine Translation.