@inproceedings{mohler-mihalcea-2008-babylon,
title = "Babylon Parallel Text Builder: Gathering Parallel Texts for Low-Density Languages",
author = "Mohler, Michael and
Mihalcea, Rada",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/L08-1060/",
abstract = "This paper describes Babylon, a system that attempts to overcome the shortage of parallel texts in low-density languages by supplementing existing parallel texts with texts gathered automatically from the Web. In addition to the identification of entire Web pages, we also propose a new feature specifically designed to find parallel text chunks within a single document. Experiments carried out on the Quechua-Spanish language pair show that the system is successful in automatically identifying a significant amount of parallel texts on the Web. Evaluations of a machine translation system trained on this corpus indicate that the Web-gathered parallel texts can supplement manually compiled parallel texts and perform significantly better than the manually compiled texts when tested on other Web-gathered data."
}
Markdown (Informal)
[Babylon Parallel Text Builder: Gathering Parallel Texts for Low-Density Languages](https://preview.aclanthology.org/fix-sig-urls/L08-1060/) (Mohler & Mihalcea, LREC 2008)
ACL