@inproceedings{el-kahlout-yvon-2010-pay,
title = "The pay-offs of preprocessing for {G}erman-{E}nglish statistical machine translation",
author = "El-Kahlout, Ilknur Durgar and
Yvon, Francois",
booktitle = "Proceedings of the 7th International Workshop on Spoken Language Translation: Papers",
month = dec # " 2-3",
year = "2010",
address = "Paris, France",
url = "https://preview.aclanthology.org/fix-sig-urls/2010.iwslt-papers.6/",
pages = "251--258",
abstract = "In this paper, we present the result of our work on improving the preprocessing for German-English statistical machine translation. We implemented and tested various improvements aimed at i) converting German texts to the new orthographic conventions; ii) performing a new tokenization for German; iii) normalizing lexical redundancy with the help of POS tagging and morphological analysis; iv) splitting German compound words with frequency based algorithm and; v) reducing singletons and out-of-vocabulary words. All these steps are performed during preprocessing on the German side. Combining all these processes, we reduced 10{\%} of the singletons, 2{\%} OOV words, and obtained 1.5 absolute (7{\%} relative) BLEU improvement on the WMT 2010 German to English News translation task."
}
Markdown (Informal)
[The pay-offs of preprocessing for German-English statistical machine translation](https://preview.aclanthology.org/fix-sig-urls/2010.iwslt-papers.6/) (El-Kahlout & Yvon, IWSLT 2010)
ACL