@inproceedings{sennrich-volk-2010-mt,
title = "{MT}-based Sentence Alignment for {OCR}-generated Parallel Texts",
author = "Sennrich, Rico and
Volk, Martin",
booktitle = "Proceedings of the 9th Conference of the Association for Machine Translation in the Americas: Research Papers",
month = oct # " 31-" # nov # " 4",
year = "2010",
address = "Denver, Colorado, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2010.amta-papers.14/",
abstract = "The performance of current sentence alignment tools varies according to the to-be-aligned texts. We have found existing tools unsuitable for hard-to-align parallel texts and describe an alternative alignment algorithm. The basic idea is to use machine translations of a text and BLEU as a similarity score to find reliable alignments which are used as anchor points. The gaps between these anchor points are then filled using BLEU-based and length-based heuristics. We show that this approach outperforms state-of-the-art algorithms in our alignment task, and that this improvement in alignment quality translates into better SMT performance. Furthermore, we show that even length-based alignment algorithms profit from having a machine translation as a point of comparison."
}
Markdown (Informal)
[MT-based Sentence Alignment for OCR-generated Parallel Texts](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2010.amta-papers.14/) (Sennrich & Volk, AMTA 2010)
ACL