@inproceedings{eisele-chen-2010-multiun,
title = "{M}ulti{UN}: A Multilingual Corpus from United Nation Documents",
author = "Eisele, Andreas and
Chen, Yu",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/L10-1473/",
abstract = "This paper describes the acquisition, preparation and properties of a corpus extracted from the official documents of the United Nations (UN). This corpus is available in all 6 official languages of the UN, consisting of around 300 million words per language. We describe the methods we used for crawling, document formatting, and sentence alignment. This corpus also includes a common test set for machine translation. We present the results of a French-Chinese machine translation experiment performed on this corpus."
}
Markdown (Informal)
[MultiUN: A Multilingual Corpus from United Nation Documents](https://preview.aclanthology.org/fix-sig-urls/L10-1473/) (Eisele & Chen, LREC 2010)
ACL