@inproceedings{bojar-prokopova-2006-czech,
title = "{C}zech-{E}nglish Word Alignment",
author = "Bojar, Ond{\v{r}}ej and
Prokopov{\'a}, Magdelena",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/L06-1158/",
abstract = "We describe an experiment with Czech-English word alignment. Half a thousand sentences were manually annotated by two annotators in parallel and the most frequent reasons for disagreement are described. We evaluate the accuracy of GIZA++ alignment toolkit on the data and identify that lemmatization of the Czech part can reduce alignment error to a half. Furthermore we document that about 38{\%} of tokens difficult for GIZA++ were difficult for humans already."
}
Markdown (Informal)
[Czech-English Word Alignment](https://preview.aclanthology.org/fix-sig-urls/L06-1158/) (Bojar & Prokopová, LREC 2006)
ACL
- Ondřej Bojar and Magdelena Prokopová. 2006. Czech-English Word Alignment. In Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06), Genoa, Italy. European Language Resources Association (ELRA).