@inproceedings{marchisio-etal-2022-embedding,
title = "Embedding-Enhanced {GIZA}++: Improving Low-Resource Word Alignment Using Embeddings",
author = "Marchisio, Kelly and
Xiong, Conghao and
Koehn, Philipp",
editor = "Duh, Kevin and
Guzm{\'a}n, Francisco",
booktitle = "Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
month = sep,
year = "2022",
address = "Orlando, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.amta-research.20/",
pages = "264--273",
abstract = "A popular natural language processing task decades ago, word alignment has been dominated until recently by GIZA++, a statistical method based on the 30-year-old IBM models. New methods that outperform GIZA++ primarily rely on large machine translation models, massively multilingual language models, or supervision from GIZA++ alignments itself. We introduce Embedding-Enhanced GIZA++, and outperform GIZA++ without any of the aforementioned factors. Taking advantage of monolingual embedding spaces of source and target language only, we exceed GIZA++{'}s performance in every tested scenario for three languages pairs. In the lowest-resource setting, we outperform GIZA++ by 8.5, 10.9, and 12 AER for RoEn, De-En, and En-Fr, respectively. We release our code at www.blind-review.code."
}
Markdown (Informal)
[Embedding-Enhanced GIZA++: Improving Low-Resource Word Alignment Using Embeddings](https://preview.aclanthology.org/fix-sig-urls/2022.amta-research.20/) (Marchisio et al., AMTA 2022)
ACL